Skip to content

Commit

Permalink
GLES: Cache and reuse programs between similar pipelines (#3380)
Browse files Browse the repository at this point in the history
* Cache programs in GLES backend by their stage info, to avoid recreating the same program untold many times

* Don't duplicate an arcs ref count in gles programs

* Extract ProgramCacheKey from gles ProgramCache

* gles: Panic if we can't acquire program_cache lock

* Clarify why the program cache is safe
  • Loading branch information
Dinnerbone authored Jan 20, 2023
1 parent 24a9042 commit c585127
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 28 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ let texture = device.create_texture(&wgpu::TextureDescriptor {

- Browsers that support `OVR_multiview2` now report the `MULTIVIEW` feature by @expenses in [#3121](https://github.com/gfx-rs/wgpu/pull/3121).
- `Limits::max_push_constant_size` on GLES is now 256 by @Dinnerbone in [#3374](https://github.com/gfx-rs/wgpu/pull/3374).
- Creating multiple pipelines with the same shaders will now be faster, by @Dinnerbone in [#3380](https://github.com/gfx-rs/wgpu/pull/3380).

#### Vulkan

Expand Down
2 changes: 2 additions & 0 deletions wgpu-hal/src/gles/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,8 @@ impl super::Adapter {
features,
shading_language_version,
max_texture_size,
next_shader_id: Default::default(),
program_cache: Default::default(),
}),
},
info: Self::make_info(vendor, renderer),
Expand Down
124 changes: 98 additions & 26 deletions wgpu-hal/src/gles/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use crate::auxil::map_naga_stage;
use glow::HasContext;
use std::{
convert::TryInto,
iter, ptr,
ptr,
sync::{Arc, Mutex},
};

use arrayvec::ArrayVec;
#[cfg(not(target_arch = "wasm32"))]
use std::mem;
use std::sync::atomic::Ordering;

type ShaderStage<'a> = (
naga::ShaderStage,
Expand Down Expand Up @@ -265,14 +267,68 @@ impl super::Device {
unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) }
}

unsafe fn create_pipeline<'a, I: Iterator<Item = ShaderStage<'a>>>(
unsafe fn create_pipeline<'a>(
&self,
gl: &glow::Context,
shaders: I,
shaders: ArrayVec<ShaderStage<'a>, 3>,
layout: &super::PipelineLayout,
#[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>,
multiview: Option<std::num::NonZeroU32>,
) -> Result<super::PipelineInner, crate::PipelineError> {
) -> Result<Arc<super::PipelineInner>, crate::PipelineError> {
let mut program_stages = ArrayVec::new();
let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len());
for group in &*layout.group_infos {
group_to_binding_to_slot.push(group.binding_to_slot.clone());
}
for &(naga_stage, stage) in &shaders {
program_stages.push(super::ProgramStage {
naga_stage: naga_stage.to_owned(),
shader_id: stage.module.id,
entry_point: stage.entry_point.to_owned(),
});
}
let glsl_version = match self.shared.shading_language_version {
naga::back::glsl::Version::Embedded { version, .. } => version,
naga::back::glsl::Version::Desktop(_) => unreachable!(),
};
let mut guard = self
.shared
.program_cache
.try_lock()
.expect("Couldn't acquire program_cache lock");
// This guard ensures that we can't accidentally destroy a program whilst we're about to reuse it
// The only place that destroys a pipeline is also locking on `program_cache`
let program = guard
.entry(super::ProgramCacheKey {
stages: program_stages,
group_to_binding_to_slot: group_to_binding_to_slot.into_boxed_slice(),
})
.or_insert_with(|| unsafe {
Self::create_program(
gl,
shaders,
layout,
label,
multiview,
glsl_version,
self.shared.private_caps,
)
})
.to_owned()?;
drop(guard);

Ok(program)
}

unsafe fn create_program<'a>(
gl: &glow::Context,
shaders: ArrayVec<ShaderStage<'a>, 3>,
layout: &super::PipelineLayout,
#[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>,
multiview: Option<std::num::NonZeroU32>,
glsl_version: u16,
private_caps: super::PrivateCapabilities,
) -> Result<Arc<super::PipelineInner>, crate::PipelineError> {
let program = unsafe { gl.create_program() }.unwrap();
#[cfg(not(target_arch = "wasm32"))]
if let Some(label) = label {
Expand Down Expand Up @@ -302,11 +358,7 @@ impl super::Device {

// Create empty fragment shader if only vertex shader is present
if has_stages == wgt::ShaderStages::VERTEX {
let version = match self.shared.shading_language_version {
naga::back::glsl::Version::Embedded { version, .. } => version,
naga::back::glsl::Version::Desktop(_) => unreachable!(),
};
let shader_src = format!("#version {} es \n void main(void) {{}}", version,);
let shader_src = format!("#version {} es \n void main(void) {{}}", glsl_version,);
log::info!("Only vertex shader is present. Creating an empty fragment shader",);
let shader = unsafe {
Self::compile_shader(
Expand Down Expand Up @@ -339,11 +391,7 @@ impl super::Device {
log::warn!("\tLink: {}", msg);
}

if !self
.shared
.private_caps
.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT)
{
if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) {
// This remapping is only needed if we aren't able to put the binding layout
// in the shader. We can't remap storage buffers this way.
unsafe { gl.use_program(Some(program)) };
Expand Down Expand Up @@ -403,11 +451,11 @@ impl super::Device {
}
}

Ok(super::PipelineInner {
Ok(Arc::new(super::PipelineInner {
program,
sampler_map,
uniforms,
})
}))
}
}

Expand Down Expand Up @@ -1062,6 +1110,7 @@ impl crate::Device<super::Api> for super::Device {
crate::ShaderInput::Naga(naga) => naga,
},
label: desc.label.map(|str| str.to_string()),
id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed),
})
}
unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {}
Expand All @@ -1071,11 +1120,11 @@ impl crate::Device<super::Api> for super::Device {
desc: &crate::RenderPipelineDescriptor<super::Api>,
) -> Result<super::RenderPipeline, crate::PipelineError> {
let gl = &self.shared.context.lock();
let shaders = iter::once((naga::ShaderStage::Vertex, &desc.vertex_stage)).chain(
desc.fragment_stage
.as_ref()
.map(|fs| (naga::ShaderStage::Fragment, fs)),
);
let mut shaders = ArrayVec::new();
shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage));
if let Some(ref fs) = desc.fragment_stage {
shaders.push((naga::ShaderStage::Fragment, fs));
}
let inner =
unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?;

Expand Down Expand Up @@ -1136,23 +1185,46 @@ impl crate::Device<super::Api> for super::Device {
})
}
unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
let mut program_cache = self.shared.program_cache.lock();
// If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache`
// This is safe to assume as long as:
// - `RenderPipeline` can't be cloned
// - The only place that we can get a new reference is during `program_cache.lock()`
if Arc::strong_count(&pipeline.inner) == 2 {
program_cache.retain(|_, v| match *v {
Ok(ref p) => p.program != pipeline.inner.program,
Err(_) => false,
});
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
}
}

unsafe fn create_compute_pipeline(
&self,
desc: &crate::ComputePipelineDescriptor<super::Api>,
) -> Result<super::ComputePipeline, crate::PipelineError> {
let gl = &self.shared.context.lock();
let shaders = iter::once((naga::ShaderStage::Compute, &desc.stage));
let mut shaders = ArrayVec::new();
shaders.push((naga::ShaderStage::Compute, &desc.stage));
let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?;

Ok(super::ComputePipeline { inner })
}
unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
let mut program_cache = self.shared.program_cache.lock();
// If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache``
// This is safe to assume as long as:
// - `ComputePipeline` can't be cloned
// - The only place that we can get a new reference is during `program_cache.lock()`
if Arc::strong_count(&pipeline.inner) == 2 {
program_cache.retain(|_, v| match *v {
Ok(ref p) => p.program != pipeline.inner.program,
Err(_) => false,
});
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
}
}

#[cfg_attr(target_arch = "wasm32", allow(unused))]
Expand Down
27 changes: 25 additions & 2 deletions wgpu-hal/src/gles/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ use arrayvec::ArrayVec;

use glow::HasContext;

use naga::FastHashMap;
use parking_lot::Mutex;
use std::sync::atomic::AtomicU32;
use std::{fmt, ops::Range, sync::Arc};

#[derive(Clone)]
Expand Down Expand Up @@ -197,6 +200,8 @@ struct AdapterShared {
workarounds: Workarounds,
shading_language_version: naga::back::glsl::Version,
max_texture_size: u32,
next_shader_id: AtomicU32,
program_cache: Mutex<ProgramCache>,
}

pub struct Adapter {
Expand Down Expand Up @@ -405,10 +410,13 @@ pub struct BindGroup {
contents: Box<[RawBinding]>,
}

type ShaderId = u32;

#[derive(Debug)]
pub struct ShaderModule {
naga: crate::NagaShader,
label: Option<String>,
id: ShaderId,
}

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -495,8 +503,23 @@ struct ColorTargetDesc {
blend: Option<BlendDesc>,
}

#[derive(PartialEq, Eq, Hash)]
struct ProgramStage {
naga_stage: naga::ShaderStage,
shader_id: ShaderId,
entry_point: String,
}

#[derive(PartialEq, Eq, Hash)]
struct ProgramCacheKey {
stages: ArrayVec<ProgramStage, 3>,
group_to_binding_to_slot: Box<[Box<[u8]>]>,
}

type ProgramCache = FastHashMap<ProgramCacheKey, Result<Arc<PipelineInner>, crate::PipelineError>>;

pub struct RenderPipeline {
inner: PipelineInner,
inner: Arc<PipelineInner>,
primitive: wgt::PrimitiveState,
vertex_buffers: Box<[VertexBufferDesc]>,
vertex_attributes: Box<[AttributeDesc]>,
Expand All @@ -514,7 +537,7 @@ unsafe impl Send for RenderPipeline {}
unsafe impl Sync for RenderPipeline {}

pub struct ComputePipeline {
inner: PipelineInner,
inner: Arc<PipelineInner>,
}

// SAFE: WASM doesn't have threads
Expand Down

0 comments on commit c585127

Please sign in to comment.