Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose maximum_frame_latency #4899

Merged
merged 14 commits into from
Jan 17, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal`
- Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759)
- GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851)
- wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886)
- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899)
- DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862)

#### OpenGL
Expand Down
1 change: 1 addition & 0 deletions examples/src/framework.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ impl<E: Example + wgpu::WasmNotSendSync> From<ExampleTestParams<E>>
format,
width: params.width,
height: params.height,
desired_maximum_frame_latency: 2,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: wgpu::CompositeAlphaMode::Auto,
view_formats: vec![format],
Expand Down
13 changes: 3 additions & 10 deletions examples/src/hello_triangle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
multiview: None,
});

let mut config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: swapchain_format,
width: size.width,
height: size.height,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: swapchain_capabilities.alpha_modes[0],
view_formats: vec![],
};

let mut config = surface
.get_default_config(&adapter, size.width, size.height)
.unwrap();
surface.configure(&device, &config);

let window = &window;
Expand Down
17 changes: 4 additions & 13 deletions examples/src/hello_windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,11 @@ impl ViewportDesc {

fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport {
let size = self.window.inner_size();

let caps = self.surface.get_capabilities(adapter);
let config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: caps.formats[0],
width: size.width,
height: size.height,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: caps.alpha_modes[0],
view_formats: vec![],
};

let config = self
.surface
.get_default_config(adapter, size.width, size.height)
.unwrap();
self.surface.configure(device, &config);

Viewport { desc: self, config }
}
}
Expand Down
12 changes: 3 additions & 9 deletions examples/src/uniform_values/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,9 @@ impl WgpuContext {
multiview: None,
});

let surface_config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: swapchain_format,
width: size.width,
height: size.height,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: swapchain_capabilities.alpha_modes[0],
view_formats: vec![],
};
let surface_config = surface
.get_default_config(&adapter, size.width, size.height)
.unwrap();
surface.configure(&device, &surface_config);

// (5)
Expand Down
9 changes: 5 additions & 4 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2000,10 +2000,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}

let num_frames = present::DESIRED_NUM_FRAMES
.clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end());
let maximum_frame_latency = config.desired_maximum_frame_latency.clamp(
*caps.maximum_frame_latency.start(),
*caps.maximum_frame_latency.end(),
);
let mut hal_config = hal::SurfaceConfiguration {
swap_chain_size: num_frames,
maximum_frame_latency,
present_mode: config.present_mode,
composite_alpha_mode: config.alpha_mode,
format: config.format,
Expand Down Expand Up @@ -2074,7 +2076,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
*presentation = Some(present::Presentation {
device: super::any_device::AnyDevice::new(device.clone()),
config: config.clone(),
num_frames,
acquired_texture: None,
});
}
Expand Down
3 changes: 0 additions & 3 deletions wgpu-core/src/present.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,11 @@ use thiserror::Error;
use wgt::SurfaceStatus as Status;

const FRAME_TIMEOUT_MS: u32 = 1000;
pub const DESIRED_NUM_FRAMES: u32 = 3;

#[derive(Debug)]
pub(crate) struct Presentation {
pub(crate) device: AnyDevice,
pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
#[allow(unused)]
pub(crate) num_frames: u32,
pub(crate) acquired_texture: Option<TextureId>,
}

Expand Down
8 changes: 4 additions & 4 deletions wgpu-hal/examples/halmark/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0;
const GRAVITY: f32 = -9.8 * 100.0;
const MAX_VELOCITY: f32 = 750.0;
const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
const DESIRED_FRAMES: u32 = 3;
const DESIRED_MAX_LATENCY: u32 = 2;

#[repr(C)]
#[derive(Clone, Copy)]
Expand Down Expand Up @@ -132,9 +132,9 @@ impl<A: hal::Api> Example<A> {

let window_size: (u32, u32) = window.inner_size().into();
let surface_config = hal::SurfaceConfiguration {
swap_chain_size: DESIRED_FRAMES.clamp(
*surface_caps.swap_chain_sizes.start(),
*surface_caps.swap_chain_sizes.end(),
maximum_frame_latency: DESIRED_MAX_LATENCY.clamp(
*surface_caps.maximum_frame_latency.start(),
*surface_caps.maximum_frame_latency.end(),
),
present_mode: wgt::PresentMode::Fifo,
composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
Expand Down
8 changes: 4 additions & 4 deletions wgpu-hal/examples/ray-traced-triangle/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::{
use winit::window::WindowButtons;

const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
const DESIRED_FRAMES: u32 = 3;
const DESIRED_MAX_LATENCY: u32 = 2;

/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc)
/// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html)
Expand Down Expand Up @@ -264,9 +264,9 @@ impl<A: hal::Api> Example<A> {
*surface_caps.formats.first().unwrap()
};
let surface_config = hal::SurfaceConfiguration {
swap_chain_size: DESIRED_FRAMES
.max(*surface_caps.swap_chain_sizes.start())
.min(*surface_caps.swap_chain_sizes.end()),
maximum_frame_latency: DESIRED_MAX_LATENCY
.max(*surface_caps.maximum_frame_latency.start())
.min(*surface_caps.maximum_frame_latency.end()),
present_mode: wgt::PresentMode::Fifo,
composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
format: surface_format,
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -626,8 +626,8 @@ impl crate::Adapter<super::Api> for super::Adapter {
wgt::TextureFormat::Rgb10a2Unorm,
wgt::TextureFormat::Rgba16Float,
],
// we currently use a flip effect which supports 2..=16 buffers
swap_chain_sizes: 2..=16,
// See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency
maximum_frame_latency: 1..=16,
current_extent,
usage: crate::TextureUses::COLOR_TARGET
| crate::TextureUses::COPY_SRC
Expand Down
15 changes: 10 additions & 5 deletions wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,13 +660,18 @@ impl crate::Surface<Api> for Surface {

let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format);

// Nvidia recommends to use 1-2 more buffers than the maximum latency
// https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/
// For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1.
let swap_chain_buffer = (config.maximum_frame_latency + 1).min(3);
Wumpf marked this conversation as resolved.
Show resolved Hide resolved

let swap_chain = match self.swap_chain.write().take() {
//Note: this path doesn't properly re-initialize all of the things
Some(sc) => {
let raw = unsafe { sc.release_resources() };
let result = unsafe {
raw.ResizeBuffers(
config.swap_chain_size,
swap_chain_buffer,
config.extent.width,
config.extent.height,
non_srgb_format,
Expand All @@ -693,7 +698,7 @@ impl crate::Surface<Api> for Surface {
quality: 0,
},
buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT,
buffer_count: config.swap_chain_size,
buffer_count: swap_chain_buffer,
scaling: d3d12::Scaling::Stretch,
swap_effect: d3d12::SwapEffect::FlipDiscard,
flags,
Expand Down Expand Up @@ -797,11 +802,11 @@ impl crate::Surface<Api> for Surface {
| SurfaceTarget::SwapChainPanel(_) => {}
}

unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) };
unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) };
let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() };

let mut resources = Vec::with_capacity(config.swap_chain_size as usize);
for i in 0..config.swap_chain_size {
let mut resources = Vec::with_capacity(config.maximum_frame_latency as usize);
for i in 0..config.maximum_frame_latency {
let mut resource = d3d12::Resource::null();
unsafe {
swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void())
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/gles/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
vec![wgt::PresentMode::Fifo] //TODO
},
composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO
swap_chain_sizes: 2..=2,
maximum_frame_latency: 2..=2, //TODO, unused currently
current_extent: None,
usage: crate::TextureUses::COLOR_TARGET,
})
Expand Down
17 changes: 10 additions & 7 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -922,11 +922,14 @@ pub struct SurfaceCapabilities {
/// Must be at least one.
pub formats: Vec<wgt::TextureFormat>,

/// Range for the swap chain sizes.
/// Range for the number of queued frames.
///
/// - `swap_chain_sizes.start` must be at least 1.
/// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`.
pub swap_chain_sizes: RangeInclusive<u32>,
/// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given,
/// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set.
///
/// - `maximum_frame_latency.start` must be at least 1.
/// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`.
pub maximum_frame_latency: RangeInclusive<u32>,

/// Current extent of the surface, if known.
pub current_extent: Option<wgt::Extent3d>,
Expand Down Expand Up @@ -1252,9 +1255,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {

#[derive(Debug, Clone)]
pub struct SurfaceConfiguration {
/// Number of textures in the swap chain. Must be in
/// `SurfaceCapabilities::swap_chain_size` range.
pub swap_chain_size: u32,
/// Maximum number of queued frames. Must be in
/// `SurfaceCapabilities::maximum_frame_latency` range.
pub maximum_frame_latency: u32,
/// Vertical synchronization mode.
pub present_mode: wgt::PresentMode,
/// Alpha composition mode.
Expand Down
11 changes: 6 additions & 5 deletions wgpu-hal/src/metal/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,14 @@ impl crate::Adapter<super::Api> for super::Adapter {
let pc = &self.shared.private_caps;
Some(crate::SurfaceCapabilities {
formats,
//Note: this is hardcoded in `CAMetalLayer` documentation
swap_chain_sizes: if pc.can_set_maximum_drawables_count {
2..=3
// We use this here to govern the maximum number of drawables + 1.
// See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount
maximum_frame_latency: if pc.can_set_maximum_drawables_count {
1..=2
} else {
// 3 is the default in `CAMetalLayer` documentation
// 3 is the default value for maximum drawables in `CAMetalLayer` documentation
// iOS 10.3 was tested to use 3 on iphone5s
3..=3
2..=2
},
present_modes: if pc.can_set_display_sync {
vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate]
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/metal/surface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl crate::Surface<super::Api> for super::Surface {
}

// this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3)
render_layer.set_maximum_drawable_count(config.swap_chain_size as _);
render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1);
render_layer.set_drawable_size(drawable_size);
if caps.can_set_next_drawable_timeout {
let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false];
Expand Down
6 changes: 5 additions & 1 deletion wgpu-hal/src/vulkan/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1838,7 +1838,11 @@ impl crate::Adapter<super::Api> for super::Adapter {
.collect();
Some(crate::SurfaceCapabilities {
formats,
swap_chain_sizes: caps.min_image_count..=max_image_count,
// TODO: Right now we're always trunkating the swap chain
// (presumably - we're actually setting the min image count which isn't necessarily the swap chain size)
// Instead, we should use extensions when available to wait in present.
// See https://github.com/gfx-rs/wgpu/issues/2869
maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`.
current_extent,
usage: conv::map_vk_image_usage(caps.supported_usage_flags),
present_modes: raw_present_modes
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/vulkan/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ impl super::Device {
let mut info = vk::SwapchainCreateInfoKHR::builder()
.flags(raw_flags)
.surface(surface.raw)
.min_image_count(config.swap_chain_size)
.min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869
.image_format(original_format)
.image_color_space(color_space)
.image_extent(vk::Extent2D {
Expand Down
21 changes: 21 additions & 0 deletions wgpu-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5107,6 +5107,26 @@ pub struct SurfaceConfiguration<V> {
/// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are
/// unsupported.
pub present_mode: PresentMode,
/// Desired maximum number of frames that the presentation engine should queue in advance.
///
/// This is a hint to the backend implementation and will always be clamped to the supported range.
/// As a consequence, either the maximum frame latency is set directly on the swap chain,
/// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported,
/// or the swap chain size is set to (max-latency + 1).
///
/// Defaults to 2 when created via `wgpu::Surface::get_default_config`.
///
/// Typical values range from 3 to 1, but higher values are possible:
/// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
/// to be queued up. This typically avoids starving the GPU's work queue.
/// Higher values are useful for achieving a constant flow of frames to the display under varying load.
/// * Choose 1 for low latency from frame recording to frame display.
/// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
/// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
/// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
/// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
/// * A value of 0 is generally not supported and always clamped to a higher value.
pub desired_maximum_frame_latency: u32,
Wumpf marked this conversation as resolved.
Show resolved Hide resolved
/// Specifies how the alpha channel of the textures should be handled during compositing.
pub alpha_mode: CompositeAlphaMode,
/// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture().
Expand All @@ -5126,6 +5146,7 @@ impl<V: Clone> SurfaceConfiguration<V> {
width: self.width,
height: self.height,
present_mode: self.present_mode,
desired_maximum_frame_latency: self.desired_maximum_frame_latency,
alpha_mode: self.alpha_mode,
view_formats: fun(self.view_formats.clone()),
}
Expand Down
1 change: 1 addition & 0 deletions wgpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5102,6 +5102,7 @@ impl Surface<'_> {
format: *caps.formats.get(0)?,
width,
height,
desired_maximum_frame_latency: 2,
present_mode: *caps.present_modes.get(0)?,
alpha_mode: wgt::CompositeAlphaMode::Auto,
view_formats: vec![],
Expand Down