From 6c452f38f49f12bffe0125414415eed2011c342c Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Sat, 14 Jan 2023 01:15:29 +0100 Subject: [PATCH] Cache programs in GLES backend by their stage info, to avoid recreating the same program untold many times --- CHANGELOG.md | 1 + wgpu-hal/src/gles/adapter.rs | 2 + wgpu-hal/src/gles/device.rs | 109 ++++++++++++++++++++++++++--------- wgpu-hal/src/gles/mod.rs | 25 +++++++- 4 files changed, 109 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90f7335877..68b94618c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -135,6 +135,7 @@ Additionally `Surface::get_default_config` now returns an Option and returns Non - Browsers that support `OVR_multiview2` now report the `MULTIVIEW` feature by @expenses in [#3121](https://github.com/gfx-rs/wgpu/pull/3121). - `Limits::max_push_constant_size` on GLES is now 256 by @Dinnerbone in [#3374](https://github.com/gfx-rs/wgpu/pull/3374). +- Creating multiple pipelines with the same shaders will now be faster, by @Dinnerbone in [#3380](https://github.com/gfx-rs/wgpu/pull/3380). #### Vulkan diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 833ae36d2d..ad7cdd475d 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -578,6 +578,8 @@ impl super::Adapter { features, shading_language_version, max_texture_size, + next_shader_id: Default::default(), + program_cache: Default::default(), }), }, info: Self::make_info(vendor, renderer), diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index a4eb09a974..8d7e141f5a 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -3,12 +3,14 @@ use crate::auxil::map_naga_stage; use glow::HasContext; use std::{ convert::TryInto, - iter, ptr, + ptr, sync::{Arc, Mutex}, }; +use arrayvec::ArrayVec; #[cfg(not(target_arch = "wasm32"))] use std::mem; +use std::sync::atomic::{AtomicU32, Ordering}; type ShaderStage<'a> = ( naga::ShaderStage, @@ -265,14 +267,60 @@ impl super::Device { unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) } } - unsafe fn create_pipeline<'a, I: Iterator>>( + unsafe fn create_pipeline<'a>( &self, gl: &glow::Context, - shaders: I, + shaders: ArrayVec, 3>, layout: &super::PipelineLayout, #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, multiview: Option, - ) -> Result { + ) -> Result, crate::PipelineError> { + let mut program_stages = ArrayVec::new(); + let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len()); + for group in &*layout.group_infos { + group_to_binding_to_slot.push(group.binding_to_slot.clone()); + } + for &(naga_stage, stage) in &shaders { + program_stages.push(super::ProgramStage { + naga_stage: naga_stage.to_owned(), + shader_id: stage.module.id, + entry_point: stage.entry_point.to_owned(), + }); + } + let glsl_version = match self.shared.shading_language_version { + naga::back::glsl::Version::Embedded { version, .. } => version, + naga::back::glsl::Version::Desktop(_) => unreachable!(), + }; + let mut guard = self.shared.program_cache.lock(); + let program = guard + .entry((program_stages, group_to_binding_to_slot.into_boxed_slice())) + .or_insert_with(|| unsafe { + Self::create_program( + gl, + shaders, + layout, + label, + multiview, + glsl_version, + self.shared.private_caps, + ) + }) + .to_owned()?; + program.ref_count.fetch_add(1, Ordering::Relaxed); + drop(guard); + + Ok(program) + } + + unsafe fn create_program<'a>( + gl: &glow::Context, + shaders: ArrayVec, 3>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option, + glsl_version: u16, + private_caps: super::PrivateCapabilities, + ) -> Result, crate::PipelineError> { let program = unsafe { gl.create_program() }.unwrap(); #[cfg(not(target_arch = "wasm32"))] if let Some(label) = label { @@ -302,11 +350,7 @@ impl super::Device { // Create empty fragment shader if only vertex shader is present if has_stages == wgt::ShaderStages::VERTEX { - let version = match self.shared.shading_language_version { - naga::back::glsl::Version::Embedded { version, .. } => version, - naga::back::glsl::Version::Desktop(_) => unreachable!(), - }; - let shader_src = format!("#version {} es \n void main(void) {{}}", version,); + let shader_src = format!("#version {} es \n void main(void) {{}}", glsl_version,); log::info!("Only vertex shader is present. Creating an empty fragment shader",); let shader = unsafe { Self::compile_shader( @@ -339,11 +383,7 @@ impl super::Device { log::warn!("\tLink: {}", msg); } - if !self - .shared - .private_caps - .contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) - { + if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) { // This remapping is only needed if we aren't able to put the binding layout // in the shader. We can't remap storage buffers this way. unsafe { gl.use_program(Some(program)) }; @@ -403,11 +443,12 @@ impl super::Device { } } - Ok(super::PipelineInner { + Ok(Arc::new(super::PipelineInner { program, sampler_map, uniforms, - }) + ref_count: AtomicU32::new(0), + })) } } @@ -1059,6 +1100,7 @@ impl crate::Device for super::Device { crate::ShaderInput::Naga(naga) => naga, }, label: desc.label.map(|str| str.to_string()), + id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed), }) } unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} @@ -1068,11 +1110,11 @@ impl crate::Device for super::Device { desc: &crate::RenderPipelineDescriptor, ) -> Result { let gl = &self.shared.context.lock(); - let shaders = iter::once((naga::ShaderStage::Vertex, &desc.vertex_stage)).chain( - desc.fragment_stage - .as_ref() - .map(|fs| (naga::ShaderStage::Fragment, fs)), - ); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage)); + if let Some(ref fs) = desc.fragment_stage { + shaders.push((naga::ShaderStage::Fragment, fs)); + } let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?; @@ -1133,8 +1175,15 @@ impl crate::Device for super::Device { }) } unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { - let gl = &self.shared.context.lock(); - unsafe { gl.delete_program(pipeline.inner.program) }; + let mut program_cache = self.shared.program_cache.lock(); + if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } } unsafe fn create_compute_pipeline( @@ -1142,14 +1191,22 @@ impl crate::Device for super::Device { desc: &crate::ComputePipelineDescriptor, ) -> Result { let gl = &self.shared.context.lock(); - let shaders = iter::once((naga::ShaderStage::Compute, &desc.stage)); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Compute, &desc.stage)); let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?; Ok(super::ComputePipeline { inner }) } unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { - let gl = &self.shared.context.lock(); - unsafe { gl.delete_program(pipeline.inner.program) }; + let mut program_cache = self.shared.program_cache.lock(); + if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } } #[cfg_attr(target_arch = "wasm32", allow(unused))] diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 6ba7f89e12..b95c84fce7 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -84,6 +84,9 @@ use arrayvec::ArrayVec; use glow::HasContext; +use naga::FastHashMap; +use parking_lot::Mutex; +use std::sync::atomic::AtomicU32; use std::{fmt, ops::Range, sync::Arc}; #[derive(Clone)] @@ -197,6 +200,8 @@ struct AdapterShared { workarounds: Workarounds, shading_language_version: naga::back::glsl::Version, max_texture_size: u32, + next_shader_id: AtomicU32, + program_cache: Mutex, } pub struct Adapter { @@ -405,10 +410,13 @@ pub struct BindGroup { contents: Box<[RawBinding]>, } +type ShaderId = u32; + #[derive(Debug)] pub struct ShaderModule { naga: crate::NagaShader, label: Option, + id: ShaderId, } #[derive(Clone, Debug, Default)] @@ -468,6 +476,7 @@ struct PipelineInner { program: glow::Program, sampler_map: SamplerBindMap, uniforms: [UniformDesc; MAX_PUSH_CONSTANTS], + ref_count: AtomicU32, } #[derive(Clone, Debug)] @@ -495,8 +504,20 @@ struct ColorTargetDesc { blend: Option, } +#[derive(PartialEq, Eq, Hash)] +struct ProgramStage { + naga_stage: naga::ShaderStage, + shader_id: ShaderId, + entry_point: String, +} + +type ProgramCache = FastHashMap< + (ArrayVec, Box<[Box<[u8]>]>), + Result, crate::PipelineError>, +>; + pub struct RenderPipeline { - inner: PipelineInner, + inner: Arc, primitive: wgt::PrimitiveState, vertex_buffers: Box<[VertexBufferDesc]>, vertex_attributes: Box<[AttributeDesc]>, @@ -514,7 +535,7 @@ unsafe impl Send for RenderPipeline {} unsafe impl Sync for RenderPipeline {} pub struct ComputePipeline { - inner: PipelineInner, + inner: Arc, } // SAFE: WASM doesn't have threads