Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GLES: Cache and reuse programs between similar pipelines #3380

Merged
merged 5 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Additionally `Surface::get_default_config` now returns an Option and returns Non

- Browsers that support `OVR_multiview2` now report the `MULTIVIEW` feature by @expenses in [#3121](https://github.com/gfx-rs/wgpu/pull/3121).
- `Limits::max_push_constant_size` on GLES is now 256 by @Dinnerbone in [#3374](https://github.com/gfx-rs/wgpu/pull/3374).
- Creating multiple pipelines with the same shaders will now be faster, by @Dinnerbone in [#3380](https://github.com/gfx-rs/wgpu/pull/3380).

#### Vulkan

Expand Down
2 changes: 2 additions & 0 deletions wgpu-hal/src/gles/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,8 @@ impl super::Adapter {
features,
shading_language_version,
max_texture_size,
next_shader_id: Default::default(),
program_cache: Default::default(),
}),
},
info: Self::make_info(vendor, renderer),
Expand Down
109 changes: 83 additions & 26 deletions wgpu-hal/src/gles/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use crate::auxil::map_naga_stage;
use glow::HasContext;
use std::{
convert::TryInto,
iter, ptr,
ptr,
sync::{Arc, Mutex},
};

use arrayvec::ArrayVec;
#[cfg(not(target_arch = "wasm32"))]
use std::mem;
use std::sync::atomic::{AtomicU32, Ordering};

type ShaderStage<'a> = (
naga::ShaderStage,
Expand Down Expand Up @@ -265,14 +267,60 @@ impl super::Device {
unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) }
}

unsafe fn create_pipeline<'a, I: Iterator<Item = ShaderStage<'a>>>(
unsafe fn create_pipeline<'a>(
&self,
gl: &glow::Context,
shaders: I,
shaders: ArrayVec<ShaderStage<'a>, 3>,
layout: &super::PipelineLayout,
#[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>,
multiview: Option<std::num::NonZeroU32>,
) -> Result<super::PipelineInner, crate::PipelineError> {
) -> Result<Arc<super::PipelineInner>, crate::PipelineError> {
let mut program_stages = ArrayVec::new();
let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len());
for group in &*layout.group_infos {
group_to_binding_to_slot.push(group.binding_to_slot.clone());
}
for &(naga_stage, stage) in &shaders {
program_stages.push(super::ProgramStage {
naga_stage: naga_stage.to_owned(),
shader_id: stage.module.id,
entry_point: stage.entry_point.to_owned(),
});
}
let glsl_version = match self.shared.shading_language_version {
naga::back::glsl::Version::Embedded { version, .. } => version,
naga::back::glsl::Version::Desktop(_) => unreachable!(),
};
let mut guard = self.shared.program_cache.lock();
Dinnerbone marked this conversation as resolved.
Show resolved Hide resolved
let program = guard
.entry((program_stages, group_to_binding_to_slot.into_boxed_slice()))
.or_insert_with(|| unsafe {
Self::create_program(
gl,
shaders,
layout,
label,
multiview,
glsl_version,
self.shared.private_caps,
)
})
.to_owned()?;
program.ref_count.fetch_add(1, Ordering::Relaxed);
drop(guard);

Ok(program)
}

unsafe fn create_program<'a>(
gl: &glow::Context,
shaders: ArrayVec<ShaderStage<'a>, 3>,
layout: &super::PipelineLayout,
#[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>,
multiview: Option<std::num::NonZeroU32>,
glsl_version: u16,
private_caps: super::PrivateCapabilities,
) -> Result<Arc<super::PipelineInner>, crate::PipelineError> {
let program = unsafe { gl.create_program() }.unwrap();
#[cfg(not(target_arch = "wasm32"))]
if let Some(label) = label {
Expand Down Expand Up @@ -302,11 +350,7 @@ impl super::Device {

// Create empty fragment shader if only vertex shader is present
if has_stages == wgt::ShaderStages::VERTEX {
let version = match self.shared.shading_language_version {
naga::back::glsl::Version::Embedded { version, .. } => version,
naga::back::glsl::Version::Desktop(_) => unreachable!(),
};
let shader_src = format!("#version {} es \n void main(void) {{}}", version,);
let shader_src = format!("#version {} es \n void main(void) {{}}", glsl_version,);
cwfitzgerald marked this conversation as resolved.
Show resolved Hide resolved
log::info!("Only vertex shader is present. Creating an empty fragment shader",);
let shader = unsafe {
Self::compile_shader(
Expand Down Expand Up @@ -339,11 +383,7 @@ impl super::Device {
log::warn!("\tLink: {}", msg);
}

if !self
.shared
.private_caps
.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT)
{
if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) {
// This remapping is only needed if we aren't able to put the binding layout
// in the shader. We can't remap storage buffers this way.
unsafe { gl.use_program(Some(program)) };
Expand Down Expand Up @@ -403,11 +443,12 @@ impl super::Device {
}
}

Ok(super::PipelineInner {
Ok(Arc::new(super::PipelineInner {
program,
sampler_map,
uniforms,
})
ref_count: AtomicU32::new(0),
Dinnerbone marked this conversation as resolved.
Show resolved Hide resolved
}))
}
}

Expand Down Expand Up @@ -1059,6 +1100,7 @@ impl crate::Device<super::Api> for super::Device {
crate::ShaderInput::Naga(naga) => naga,
},
label: desc.label.map(|str| str.to_string()),
id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed),
})
}
unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {}
Expand All @@ -1068,11 +1110,11 @@ impl crate::Device<super::Api> for super::Device {
desc: &crate::RenderPipelineDescriptor<super::Api>,
) -> Result<super::RenderPipeline, crate::PipelineError> {
let gl = &self.shared.context.lock();
let shaders = iter::once((naga::ShaderStage::Vertex, &desc.vertex_stage)).chain(
desc.fragment_stage
.as_ref()
.map(|fs| (naga::ShaderStage::Fragment, fs)),
);
let mut shaders = ArrayVec::new();
shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage));
if let Some(ref fs) = desc.fragment_stage {
shaders.push((naga::ShaderStage::Fragment, fs));
}
let inner =
unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?;

Expand Down Expand Up @@ -1133,23 +1175,38 @@ impl crate::Device<super::Api> for super::Device {
})
}
unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
let mut program_cache = self.shared.program_cache.lock();
if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 {
program_cache.retain(|_, v| match *v {
Ok(ref p) => p.program != pipeline.inner.program,
Err(_) => false,
});
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
}
}

unsafe fn create_compute_pipeline(
&self,
desc: &crate::ComputePipelineDescriptor<super::Api>,
) -> Result<super::ComputePipeline, crate::PipelineError> {
let gl = &self.shared.context.lock();
let shaders = iter::once((naga::ShaderStage::Compute, &desc.stage));
let mut shaders = ArrayVec::new();
shaders.push((naga::ShaderStage::Compute, &desc.stage));
let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?;

Ok(super::ComputePipeline { inner })
}
unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
let mut program_cache = self.shared.program_cache.lock();
if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 {
program_cache.retain(|_, v| match *v {
Ok(ref p) => p.program != pipeline.inner.program,
Err(_) => false,
});
cwfitzgerald marked this conversation as resolved.
Show resolved Hide resolved
let gl = &self.shared.context.lock();
unsafe { gl.delete_program(pipeline.inner.program) };
}
}

#[cfg_attr(target_arch = "wasm32", allow(unused))]
Expand Down
25 changes: 23 additions & 2 deletions wgpu-hal/src/gles/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ use arrayvec::ArrayVec;

use glow::HasContext;

use naga::FastHashMap;
use parking_lot::Mutex;
use std::sync::atomic::AtomicU32;
use std::{fmt, ops::Range, sync::Arc};

#[derive(Clone)]
Expand Down Expand Up @@ -197,6 +200,8 @@ struct AdapterShared {
workarounds: Workarounds,
shading_language_version: naga::back::glsl::Version,
max_texture_size: u32,
next_shader_id: AtomicU32,
program_cache: Mutex<ProgramCache>,
}

pub struct Adapter {
Expand Down Expand Up @@ -405,10 +410,13 @@ pub struct BindGroup {
contents: Box<[RawBinding]>,
}

type ShaderId = u32;

#[derive(Debug)]
pub struct ShaderModule {
naga: crate::NagaShader,
label: Option<String>,
id: ShaderId,
}

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -468,6 +476,7 @@ struct PipelineInner {
program: glow::Program,
sampler_map: SamplerBindMap,
uniforms: [UniformDesc; MAX_PUSH_CONSTANTS],
ref_count: AtomicU32,
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -495,8 +504,20 @@ struct ColorTargetDesc {
blend: Option<BlendDesc>,
}

#[derive(PartialEq, Eq, Hash)]
struct ProgramStage {
naga_stage: naga::ShaderStage,
shader_id: ShaderId,
entry_point: String,
}

type ProgramCache = FastHashMap<
(ArrayVec<ProgramStage, 3>, Box<[Box<[u8]>]>),
Result<Arc<PipelineInner>, crate::PipelineError>,
>;
Dinnerbone marked this conversation as resolved.
Show resolved Hide resolved

pub struct RenderPipeline {
inner: PipelineInner,
inner: Arc<PipelineInner>,
primitive: wgt::PrimitiveState,
vertex_buffers: Box<[VertexBufferDesc]>,
vertex_attributes: Box<[AttributeDesc]>,
Expand All @@ -514,7 +535,7 @@ unsafe impl Send for RenderPipeline {}
unsafe impl Sync for RenderPipeline {}

pub struct ComputePipeline {
inner: PipelineInner,
inner: Arc<PipelineInner>,
}

// SAFE: WASM doesn't have threads
Expand Down