From c90a27746ffa0c3d811e43b640b7c671ea3fc60e Mon Sep 17 00:00:00 2001 From: Imbris Date: Sun, 14 Apr 2024 13:34:01 -0400 Subject: [PATCH] Allow unconsumed inputs in fragment shaders by removing them from vertex outputs when generating HLSL. Fixes https://github.com/gfx-rs/wgpu/issues/3748 * Add naga::back::hlsl::FragmentEntryPoint for providing information about the fragment entry point when generating vertex entry points via naga::back::hlsl::Writer::write. Vertex outputs not consumed by the fragment entry point are omitted in the final output struct. * Add naga snapshot test for this new feature, * Remove Features::SHADER_UNUSED_VERTEX_OUTPUT, StageError::InputNotConsumed, and associated validation logic. * Make wgpu dx12 backend pass fragment shader info when generating vertex HLSL. * Add wgpu regression test for allowing unconsumed inputs. --- deno_webgpu/lib.rs | 7 -- naga-cli/src/bin/naga.rs | 1 + naga/src/back/hlsl/mod.rs | 29 ++++++++ naga/src/back/hlsl/writer.rs | 72 +++++++++++++++++-- .../unconsumed_vertex_outputs_frag.param.ron | 2 + .../in/unconsumed_vertex_outputs_frag.wgsl | 13 ++++ .../unconsumed_vertex_outputs_vert.param.ron | 2 + .../in/unconsumed_vertex_outputs_vert.wgsl | 13 ++++ .../hlsl/unconsumed_vertex_outputs_frag.hlsl | 17 +++++ .../hlsl/unconsumed_vertex_outputs_frag.ron | 12 ++++ .../hlsl/unconsumed_vertex_outputs_vert.hlsl | 30 ++++++++ .../hlsl/unconsumed_vertex_outputs_vert.ron | 12 ++++ naga/tests/snapshots.rs | 51 +++++++++++-- tests/tests/gpu.rs | 1 + tests/tests/regression/issue_3748.rs | 50 +++++++++++++ tests/tests/regression/issue_3748.wgsl | 23 ++++++ wgpu-core/src/device/resource.rs | 3 +- wgpu-core/src/validation.rs | 49 +++++-------- wgpu-hal/src/dx12/device.rs | 26 +++++-- wgpu-hal/src/gles/adapter.rs | 1 - wgpu-hal/src/metal/adapter.rs | 1 - wgpu-hal/src/vulkan/adapter.rs | 1 - wgpu-types/src/lib.rs | 9 --- 23 files changed, 355 insertions(+), 70 deletions(-) create mode 100644 naga/tests/in/unconsumed_vertex_outputs_frag.param.ron create mode 100644 naga/tests/in/unconsumed_vertex_outputs_frag.wgsl create mode 100644 naga/tests/in/unconsumed_vertex_outputs_vert.param.ron create mode 100644 naga/tests/in/unconsumed_vertex_outputs_vert.wgsl create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron create mode 100644 tests/tests/regression/issue_3748.rs create mode 100644 tests/tests/regression/issue_3748.wgsl diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs index d3b924bf7f..ef1e376a91 100644 --- a/deno_webgpu/lib.rs +++ b/deno_webgpu/lib.rs @@ -363,9 +363,6 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> { if features.contains(wgpu_types::Features::SHADER_EARLY_DEPTH_TEST) { return_features.push("shader-early-depth-test"); } - if features.contains(wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT) { - return_features.push("shader-unused-vertex-output"); - } return_features } @@ -632,10 +629,6 @@ impl From for wgpu_types::Features { wgpu_types::Features::SHADER_EARLY_DEPTH_TEST, required_features.0.contains("shader-early-depth-test"), ); - features.set( - wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT, - required_features.0.contains("shader-unused-vertex-output"), - ); features } diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs index 3b0873a376..e519e67c9f 100644 --- a/naga-cli/src/bin/naga.rs +++ b/naga-cli/src/bin/naga.rs @@ -604,6 +604,7 @@ fn write_output( "Generating hlsl output requires validation to \ succeed, and it failed in a previous step", ))?, + None, ) .unwrap_pretty(); fs::write(output_path, buffer)?; diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index f3a6f9106c..ff530f8b4a 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -268,6 +268,35 @@ impl Wrapped { } } +/// A fragment entry point to be considered when generating HLSL for the output interface of vertex +/// entry points. +/// +/// This is provided as an optional paramter to [`Writer::write`]. +/// +/// If this is provided, vertex outputs will be removed if they are not inputs of this fragment +/// entry point. This is necessary for generating correct HLSL when some of the vertex shader +/// outputs are not consumed by the fragment shader. +pub struct FragmentEntryPoint<'a> { + module: &'a crate::Module, + func: &'a crate::Function, +} + +impl<'a> FragmentEntryPoint<'a> { + /// Returns `None` if the entry point with the provided name can't be found or isn't a fragment + /// entry point. + pub fn new(module: &'a crate::Module, ep_name: &'a str) -> Option { + module + .entry_points + .iter() + .find(|ep| ep.name == ep_name) + .filter(|ep| ep.stage == crate::ShaderStage::Fragment) + .map(|ep| Self { + module, + func: &ep.function, + }) + } +} + pub struct Writer<'a, W> { out: W, names: crate::FastHashMap, diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index f26604476a..32fe510452 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -1,7 +1,7 @@ use super::{ help::{WrappedArrayLength, WrappedConstructor, WrappedImageQuery, WrappedStructMatrixAccess}, storage::StoreValue, - BackendResult, Error, Options, + BackendResult, Error, FragmentEntryPoint, Options, }; use crate::{ back, @@ -23,6 +23,7 @@ pub(crate) const FREXP_FUNCTION: &str = "naga_frexp"; struct EpStructMember { name: String, ty: Handle, + // TODO: log error if binding is none? // technically, this should always be `Some` binding: Option, index: u32, @@ -183,6 +184,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { &mut self, module: &Module, module_info: &valid::ModuleInfo, + fragment_entry_point: Option<&FragmentEntryPoint<'_>>, ) -> Result { self.reset(module); @@ -278,7 +280,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // Write all entry points wrapped structs for (index, ep) in module.entry_points.iter().enumerate() { let ep_name = self.names[&NameKey::EntryPoint(index as u16)].clone(); - let ep_io = self.write_ep_interface(module, &ep.function, ep.stage, &ep_name)?; + let ep_io = self.write_ep_interface( + module, + &ep.function, + ep.stage, + &ep_name, + fragment_entry_point, + )?; self.entry_point_io.push(ep_io); } @@ -472,6 +480,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out, "}};")?; writeln!(self.out)?; + // See ordering notes on EntryPointInterface fields match shader_stage.1 { Io::Input => { // bring back the original order @@ -505,6 +514,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { for arg in func.arguments.iter() { match module.types[arg.ty].inner { TypeInner::Struct { ref members, .. } => { + // TODO: what about nested structs? Is that possible? Maybe try an unwrap on + // the binding? for member in members.iter() { let name = self.namer.call_or(&member.name, "member"); let index = fake_members.len() as u32; @@ -541,10 +552,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { result: &crate::FunctionResult, stage: ShaderStage, entry_point_name: &str, + frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { let struct_name = format!("{stage:?}Output_{entry_point_name}"); - let mut fake_members = Vec::new(); let empty = []; let members = match module.types[result.ty].inner { TypeInner::Struct { ref members, .. } => members, @@ -554,14 +565,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } }; - for member in members.iter() { + // Gather list of fragment input locations. We use this below to remove user-defined + // varyings from VS outputs that aren't in the FS inputs. This makes the VS interface match + // as long as the FS inputs are a subset of the VS outputs. This is only applied if the + // writer is supplied with information about the fragment entry point. + let fs_input_locs = if let (Some(frag_ep), ShaderStage::Vertex) = (frag_ep, stage) { + let mut fs_input_locs = Vec::new(); + for arg in frag_ep.func.arguments.iter() { + let mut push_if_location = |binding: &Option| { + match *binding { + Some(crate::Binding::Location { location, .. }) => { + fs_input_locs.push(location) + } + Some(crate::Binding::BuiltIn(_)) => {} + // Log error? + None => {} + } + }; + match frag_ep.module.types[arg.ty].inner { + TypeInner::Struct { ref members, .. } => { + // TODO: nesting? + for member in members.iter() { + push_if_location(&member.binding); + } + } + _ => push_if_location(&arg.binding), + } + } + fs_input_locs.sort(); + Some(fs_input_locs) + } else { + None + }; + + let mut fake_members = Vec::new(); + for (index, member) in members.iter().enumerate() { + if let Some(ref fs_input_locs) = fs_input_locs { + match member.binding { + Some(crate::Binding::Location { location, .. }) => { + if fs_input_locs.binary_search(&location).is_err() { + continue; + } + } + Some(crate::Binding::BuiltIn(_)) => {} + // Log error? + None => {} + } + } + let member_name = self.namer.call_or(&member.name, "member"); - let index = fake_members.len() as u32; fake_members.push(EpStructMember { name: member_name, ty: member.ty, binding: member.binding.clone(), - index, + index: index as u32, }); } @@ -577,6 +634,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { func: &crate::Function, stage: ShaderStage, ep_name: &str, + frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { Ok(EntryPointInterface { input: if !func.arguments.is_empty() && stage == ShaderStage::Fragment { @@ -586,7 +644,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { }, output: match func.result { Some(ref fr) if fr.binding.is_none() && stage == ShaderStage::Vertex => { - Some(self.write_ep_output_struct(module, fr, stage, ep_name)?) + Some(self.write_ep_output_struct(module, fr, stage, ep_name, frag_ep)?) } _ => None, }, diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron new file mode 100644 index 0000000000..72873dd667 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron @@ -0,0 +1,2 @@ +( +) diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl new file mode 100644 index 0000000000..3a656c9696 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl @@ -0,0 +1,13 @@ +// Out of order to test sorting. +struct FragmentIn { + @location(1) value: f32, + @location(3) value2: f32, + @builtin(position) position: vec4, + // @location(0) unused_value: f32, + // @location(2) unused_value2: vec4, +} + +@fragment +fn fs_main(v_out: FragmentIn) -> @location(0) vec4 { + return vec4(v_out.value, v_out.value, v_out.value2, v_out.value2); +} diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron new file mode 100644 index 0000000000..72873dd667 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron @@ -0,0 +1,2 @@ +( +) diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl new file mode 100644 index 0000000000..46c39ea930 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl @@ -0,0 +1,13 @@ +// Out of order to test sorting. +struct VertexOut { + @builtin(position) position: vec4, + @location(1) value: f32, + @location(2) unused_value2: vec4, + @location(0) unused_value: f32, + @location(3) value2: f32, +} + +@vertex +fn vs_main() -> VertexOut { + return VertexOut(vec4(1.0), 1.0, vec4(2.0), 1.0, 0.5); +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl new file mode 100644 index 0000000000..4005e43538 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl @@ -0,0 +1,17 @@ +struct FragmentIn { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +struct FragmentInput_fs_main { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 +{ + FragmentIn v_out = { fragmentinput_fs_main.value, fragmentinput_fs_main.value2_, fragmentinput_fs_main.position }; + return float4(v_out.value, v_out.value, v_out.value2_, v_out.value2_); +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron new file mode 100644 index 0000000000..eac1b945d2 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ( + entry_point:"fs_main", + target_profile:"ps_5_1", + ), + ], + compute:[ + ], +) diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl new file mode 100644 index 0000000000..ea75d63877 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl @@ -0,0 +1,30 @@ +struct VertexOut { + float4 position : SV_Position; + float value : LOC1; + float4 unused_value2_ : LOC2; + float unused_value : LOC0; + float value2_ : LOC3; +}; + +struct VertexOutput_vs_main { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +VertexOut ConstructVertexOut(float4 arg0, float arg1, float4 arg2, float arg3, float arg4) { + VertexOut ret = (VertexOut)0; + ret.position = arg0; + ret.value = arg1; + ret.unused_value2_ = arg2; + ret.unused_value = arg3; + ret.value2_ = arg4; + return ret; +} + +VertexOutput_vs_main vs_main() +{ + const VertexOut vertexout = ConstructVertexOut((1.0).xxxx, 1.0, (2.0).xxxx, 1.0, 0.5); + const VertexOutput_vs_main vertexout_1 = { vertexout.value, vertexout.value2_, vertexout.position }; + return vertexout_1; +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron new file mode 100644 index 0000000000..a24f8d0eb8 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ( + entry_point:"vs_main", + target_profile:"vs_5_1", + ), + ], + fragment:[ + ], + compute:[ + ], +) diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index ed75805ae0..71dbbd9e96 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -248,13 +248,24 @@ impl Input { } } +#[cfg(feature = "hlsl-out")] +type FragmentEntryPoint<'a> = naga::back::hlsl::FragmentEntryPoint<'a>; +#[cfg(not(feature = "hlsl-out"))] +type FragmentEntryPoint<'a> = (); + #[allow(unused_variables)] fn check_targets( input: &Input, module: &mut naga::Module, targets: Targets, source_code: Option<&str>, + // For testing hlsl generation when fragment shader doesn't consume all vertex outputs. + frag_ep: Option, ) { + if frag_ep.is_some() && !targets.contains(Targets::HLSL) { + panic!("Providing FragmentEntryPoint only makes sense when testing hlsl-out"); + } + let params = input.read_parameters(); let name = &input.file_name; @@ -374,7 +385,7 @@ fn check_targets( #[cfg(all(feature = "deserialize", feature = "hlsl-out"))] { if targets.contains(Targets::HLSL) { - write_output_hlsl(input, module, &info, ¶ms.hlsl); + write_output_hlsl(input, module, &info, ¶ms.hlsl, frag_ep); } } #[cfg(all(feature = "deserialize", feature = "wgsl-out"))] @@ -537,6 +548,7 @@ fn write_output_hlsl( module: &naga::Module, info: &naga::valid::ModuleInfo, options: &naga::back::hlsl::Options, + frag_ep: Option, ) { use naga::back::hlsl; use std::fmt::Write as _; @@ -545,7 +557,7 @@ fn write_output_hlsl( let mut buffer = String::new(); let mut writer = hlsl::Writer::new(&mut buffer, options); - let reflection_info = writer.write(module, info).expect("HLSL write failed"); + let reflection_info = writer.write(module, info, frag_ep.as_ref()).expect("HLSL write failed"); input.write_output_file("hlsl", "hlsl", buffer); @@ -786,7 +798,7 @@ fn convert_wgsl() { let input = Input::new(None, name, "wgsl"); let source = input.read_source(); match naga::front::wgsl::parse_str(&source) { - Ok(mut module) => check_targets(&input, &mut module, targets, None), + Ok(mut module) => check_targets(&input, &mut module, targets, None, None), Err(e) => panic!("{}", e.emit_to_string(&source)), } } @@ -802,13 +814,40 @@ fn convert_wgsl() { let input = Input::new(None, name, "wgsl"); let source = input.read_source(); match naga::front::wgsl::parse_str(&source) { - Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source)), + Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source), None), Err(e) => panic!("{}", e.emit_to_string(&source)), } } } } +#[cfg(all(feature = "wgsl-in", feature = "hlsl-out"))] +#[test] +fn unconsumed_vertex_outputs_hlsl_out() { + let load_and_parse = |name| { + // WGSL shaders lives in root dir as a privileged. + let input = Input::new(None, name, "wgsl"); + let source = input.read_source(); + let module = match naga::front::wgsl::parse_str(&source) { + Ok(module) => module, + Err(e) => panic!("{}", e.emit_to_string(&source)), + }; + (input, module) + }; + + // Uses separate wgsl files to make sure the tested code doesn't accidentally rely on + // the fragment entry point being from the same parsed content (e.g. accidentally using the + // wrong `Module` when looking up info). We also don't just create a module from the same file + // twice since everything would probably be stored behind the same keys. + let (input, mut module) = load_and_parse("unconsumed_vertex_outputs_vert"); + let (frag_input, mut frag_module) = load_and_parse("unconsumed_vertex_outputs_frag"); + let frag_ep = naga::back::hlsl::FragmentEntryPoint::new(&frag_module, "fs_main") + .expect("fs_main not found"); + + check_targets(&input, &mut module, Targets::HLSL, None, Some(frag_ep)); + check_targets(&frag_input, &mut frag_module, Targets::HLSL, None, None); +} + #[cfg(feature = "spv-in")] fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) { let _ = env_logger::try_init(); @@ -823,7 +862,7 @@ fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) { }, ) .unwrap(); - check_targets(&input, &mut module, targets, None); + check_targets(&input, &mut module, targets, None, None); } #[cfg(feature = "spv-in")] @@ -870,7 +909,7 @@ fn convert_glsl_variations_check() { &source, ) .unwrap(); - check_targets(&input, &mut module, Targets::GLSL, None); + check_targets(&input, &mut module, Targets::GLSL, None, None); } #[cfg(feature = "glsl-in")] diff --git a/tests/tests/gpu.rs b/tests/tests/gpu.rs index a5fbcde9da..b343a00541 100644 --- a/tests/tests/gpu.rs +++ b/tests/tests/gpu.rs @@ -1,5 +1,6 @@ mod regression { mod issue_3457; + mod issue_3748; mod issue_4024; mod issue_4122; } diff --git a/tests/tests/regression/issue_3748.rs b/tests/tests/regression/issue_3748.rs new file mode 100644 index 0000000000..77b65cca9a --- /dev/null +++ b/tests/tests/regression/issue_3748.rs @@ -0,0 +1,50 @@ +use wgpu_test::{gpu_test, GpuTestConfiguration}; + +use wgpu::*; + +/// Previously, for every user-defined vertex output a fragment shader had to have a corresponding +/// user-defined input. This would generate `StageError::InputNotComsumed`. +/// +/// This requirement was removed from the WebGPU spec. Now, when generating hlsl, wgpu will +/// automatically remove any user-defined outputs from the vertex shader that are not present in +/// the fragment inputs. This is necessary for generating correct hlsl: +/// https://github.com/gfx-rs/naga/issues/1945 +#[gpu_test] +static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration = + GpuTestConfiguration::new().run_async(|ctx| async move { + let module = ctx + .device + .create_shader_module(include_wgsl!("issue_3748.wgsl")); + + let pipeline_layout = ctx + .device + .create_pipeline_layout(&PipelineLayoutDescriptor { + label: Some("Pipeline Layout"), + bind_group_layouts: &[], + push_constant_ranges: &[], + }); + + ctx.device + .create_render_pipeline(&RenderPipelineDescriptor { + label: Some("Pipeline"), + layout: Some(&pipeline_layout), + vertex: VertexState { + module: &module, + entry_point: "vs_main", + buffers: &[], + }, + primitive: PrimitiveState::default(), + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + module: &module, + entry_point: "fs_main", + targets: &[Some(ColorTargetState { + format: TextureFormat::Rgba8Unorm, + blend: None, + write_mask: ColorWrites::all(), + })], + }), + multiview: None, + }); + }); diff --git a/tests/tests/regression/issue_3748.wgsl b/tests/tests/regression/issue_3748.wgsl new file mode 100644 index 0000000000..78ace6d9db --- /dev/null +++ b/tests/tests/regression/issue_3748.wgsl @@ -0,0 +1,23 @@ +struct VertexOut { + @builtin(position) position: vec4, + @location(0) unused_value: f32, + @location(1) value: f32, +} + +struct FragmentIn { + @builtin(position) position: vec4, + // @location(0) unused_value: f32, + @location(1) value: f32, +} + +@vertex +fn vs_main() -> VertexOut { + return VertexOut(vec4(1.0), 1.0, 1.0); +} + +@fragment +fn fs_main(v_out: FragmentIn) -> @location(0) vec4 { + return vec4(v_out.value); +} + + diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index f8ce7d5a6c..25b22d4dd2 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -1341,8 +1341,7 @@ impl Device { }) })?; - let interface = - validation::Interface::new(&module, &info, self.limits.clone(), self.features); + let interface = validation::Interface::new(&module, &info, self.limits.clone()); let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { module, info, diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index 1bf318bbaa..7f1af43912 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -122,7 +122,6 @@ struct EntryPoint { #[derive(Debug)] pub struct Interface { limits: wgt::Limits, - features: wgt::Features, resources: naga::Arena, entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, } @@ -263,8 +262,6 @@ pub enum StageError { #[source] error: InputError, }, - #[error("Location[{location}] is provided by the previous stage output but is not consumed as input by this stage.")] - InputNotConsumed { location: wgt::ShaderLocation }, } fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option { @@ -836,12 +833,7 @@ impl Interface { list.push(varying); } - pub fn new( - module: &naga::Module, - info: &naga::valid::ModuleInfo, - limits: wgt::Limits, - features: wgt::Features, - ) -> Self { + pub fn new(module: &naga::Module, info: &naga::valid::ModuleInfo, limits: wgt::Limits) -> Self { let mut resources = naga::Arena::new(); let mut resource_mapping = FastHashMap::default(); for (var_handle, var) in module.global_variables.iter() { @@ -922,7 +914,6 @@ impl Interface { Self { limits, - features, resources, entry_points, } @@ -1102,6 +1093,11 @@ impl Interface { )); } ( + // TODO: is a subtype allowed here? This isn't clear + // from the line in the spec: "For each user-defined + // input of descriptor.fragment there must be a + // user-defined output of descriptor.vertex that + // location, type, and interpolation of the input." iv.ty.is_subtype_of(&provided.ty), iv.ty.dim.num_components(), ) @@ -1127,35 +1123,23 @@ impl Interface { } } } + // TODO: front_facing, sample_index, and sample_mask builtin's should all increase + // components count for fragment input. Varying::BuiltIn(_) => {} } } - // Check all vertex outputs and make sure the fragment shader consumes them. - // This requirement is removed if the `SHADER_UNUSED_VERTEX_OUTPUT` feature is enabled. - if shader_stage == naga::ShaderStage::Fragment - && !self - .features - .contains(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT) - { - for &index in inputs.keys() { - // This is a linear scan, but the count should be low enough - // that this should be fine. - let found = entry_point.inputs.iter().any(|v| match *v { - Varying::Local { location, .. } => location == index, - Varying::BuiltIn(_) => false, - }); - - if !found { - return Err(StageError::InputNotConsumed { location: index }); - } - } - } - if shader_stage == naga::ShaderStage::Vertex { + // TODO: if topology is point we should add 1 to inter_stage_components? for output in entry_point.outputs.iter() { //TODO: count builtins towards the limit? inter_stage_components += match *output { + // TODO: Spec mentions "Each user-defined output of descriptor.vertex consumes + // 4 scalar components". Not that it varies based on the type. So is there an + // inconsistency here? Also are all these "user-defined" or is that unknown at + // this stage? + // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces + // (same applies to counting components for fragment inputs) Varying::Local { ref iv, .. } => iv.ty.dim.num_components(), Varying::BuiltIn(_) => 0, }; @@ -1177,6 +1161,9 @@ impl Interface { } } + // TODO: spec also has a max_inter_stage_shader_variables + // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces and + // the location of user defined outputs(vertex)/inputs(fragment) must all be less than this if inter_stage_components > self.limits.max_inter_stage_shader_components { return Err(StageError::TooManyVaryings { used: inter_stage_components, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 4ad43cc165..e1e54af5f4 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -202,14 +202,26 @@ impl super::Device { Ok(()) } + /// When generating the vertex shader, the fragment stage must be passed if it exists! + /// Otherwise, the generated HLSL may be incorrect since the fragment shader inputs are + /// allowed to be a subset of the vertex outputs. fn load_shader( &self, stage: &crate::ProgrammableStage, layout: &super::PipelineLayout, naga_stage: naga::ShaderStage, + fragment_stage: Option<&crate::ProgrammableStage>, ) -> Result { use naga::back::hlsl; + let frag_ep = fragment_stage + .map(|fs_stage| { + hlsl::FragmentEntryPoint::new(&fs_stage.module.naga.module, fs_stage.entry_point).ok_or( + crate::PipelineError::EntryPoint(naga::ShaderStage::Fragment), + ) + }) + .transpose()?; + let stage_bit = crate::auxil::map_naga_stage(naga_stage); let module = &stage.module.naga.module; //TODO: reuse the writer @@ -218,7 +230,7 @@ impl super::Device { let reflection_info = { profiling::scope!("naga::back::hlsl::write"); writer - .write(module, &stage.module.naga.info) + .write(module, &stage.module.naga.info, frag_ep.as_ref()) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? }; @@ -1266,12 +1278,16 @@ impl crate::Device for super::Device { let (topology_class, topology) = conv::map_topology(desc.primitive.topology); let mut shader_stages = wgt::ShaderStages::VERTEX; - let blob_vs = - self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + naga::ShaderStage::Vertex, + desc.fragment_stage.as_ref(), + )?; let blob_fs = match desc.fragment_stage { Some(ref stage) => { shader_stages |= wgt::ShaderStages::FRAGMENT; - Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment, None)?) } None => None, }; @@ -1448,7 +1464,7 @@ impl crate::Device for super::Device { &self, desc: &crate::ComputePipelineDescriptor, ) -> Result { - let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute, None)?; let pair = { profiling::scope!("ID3D12Device::CreateComputePipelineState"); diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index e8cf3d270d..a3aa5d7627 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -454,7 +454,6 @@ impl super::Adapter { wgt::Features::SHADER_EARLY_DEPTH_TEST, supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"), ); - features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true); if extensions.contains("GL_ARB_timer_query") { features.set(wgt::Features::TIMESTAMP_QUERY, true); features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES, true); diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 2f48712b9b..25fde48cd1 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -876,7 +876,6 @@ impl super::PrivateCapabilities { features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); - features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); features } diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index fd62473fd7..faeaf82654 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -523,7 +523,6 @@ impl PhysicalDeviceFeatures { | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, ); features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); - features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); features.set( F::BGRA8UNORM_STORAGE, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 82989598ef..f3562fe5cb 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -747,15 +747,6 @@ bitflags::bitflags! { /// This is a native only feature. const VERTEX_ATTRIBUTE_64BIT = 1 << 53; - /// Allows vertex shaders to have outputs which are not consumed - /// by the fragment shader. - /// - /// Supported platforms: - /// - Vulkan - /// - Metal - /// - OpenGL - const SHADER_UNUSED_VERTEX_OUTPUT = 1 << 54; - // 54..59 available // Shader: