Add Bind Group Creation Benchmark

gfx-rs · Dec 13, 2024 · 5c39447 · 5c39447
1 parent 3cc63af
commit 5c39447
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 5 deletions.
diff --git a/benches/benches/bind_groups.rs b/benches/benches/bind_groups.rs
@@ -0,0 +1,130 @@
+use std::{
+    num::NonZeroU32,
+    time::{Duration, Instant},
+};
+
+use criterion::{criterion_group, Criterion, Throughput};
+use nanorand::{Rng, WyRand};
+use once_cell::sync::Lazy;
+
+use crate::DeviceState;
+
+struct BindGroupState {
+    device_state: DeviceState,
+    texture_views: Vec<wgpu::TextureView>,
+}
+
+impl BindGroupState {
+    /// Create and prepare all the resources needed for the renderpass benchmark.
+    fn new() -> Self {
+        let device_state = DeviceState::new();
+
+        const TEXTURE_COUNT: u32 = 50_000;
+
+        // Performance gets considerably worse if the resources are shuffled.
+        //
+        // This more closely matches the real-world use case where resources have no
+        // well defined usage order.
+        let mut random = WyRand::new_seed(0x8BADF00D);
+
+        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT as usize);
+        for i in 0..TEXTURE_COUNT {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("Texture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::Rgba8UnormSrgb,
+                    usage: wgpu::TextureUsages::TEXTURE_BINDING,
+                    view_formats: &[],
+                });
+            texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("Texture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut texture_views);
+
+        Self {
+            device_state,
+            texture_views,
+        }
+    }
+}
+
+fn run_bench(ctx: &mut Criterion) {
+    let state = Lazy::new(BindGroupState::new);
+
+    let mut group = ctx.benchmark_group("Bind Group Creation");
+
+    for count in [5, 50, 500, 5_000, 50_000] {
+        let bind_group_layout =
+            state
+                .device_state
+                .device
+                .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                    label: None,
+                    entries: &[wgpu::BindGroupLayoutEntry {
+                        binding: 0,
+                        visibility: wgpu::ShaderStages::FRAGMENT,
+                        ty: wgpu::BindingType::Texture {
+                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                            view_dimension: wgpu::TextureViewDimension::D2,
+                            multisampled: false,
+                        },
+                        count: Some(NonZeroU32::new(count).unwrap()),
+                    }],
+                });
+
+        group.throughput(Throughput::Elements(count as u64));
+        group.bench_with_input(
+            format!("{} Element Bind Group", count),
+            &count,
+            |b, &count| {
+                b.iter_custom(|iters| {
+                    let texture_view_refs: Vec<_> =
+                        state.texture_views.iter().take(count as usize).collect();
+
+                    let mut duration = Duration::ZERO;
+                    for _ in 0..iters {
+                        profiling::scope!("benchmark iteration");
+
+                        let start = Instant::now();
+                        let bind_group = state.device_state.device.create_bind_group(
+                            &wgpu::BindGroupDescriptor {
+                                layout: &bind_group_layout,
+                                entries: &[wgpu::BindGroupEntry {
+                                    binding: 0,
+                                    resource: wgpu::BindingResource::TextureViewArray(
+                                        &texture_view_refs,
+                                    ),
+                                }],
+                                label: None,
+                            },
+                        );
+
+                        duration += start.elapsed();
+
+                        drop(bind_group);
+                        state.device_state.device.poll(wgpu::Maintain::Wait);
+                    }
+
+                    duration
+                });
+            },
+        );
+    }
+}
+
+criterion_group! {
+    name = bind_groups;
+    config = Criterion::default().measurement_time(Duration::from_secs(10));
+    targets = run_bench,
+}
diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
@@ -11,9 +11,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use crate::DeviceState;
 
 fn dispatch_count() -> usize {
-    // On CI we only want to run a very lightweight version of the benchmark
+    // When testing we only want to run a very lightweight version of the benchmark
     // to ensure that it does not break.
-    if std::env::var("WGPU_TESTING").is_ok() {
+    if std::env::var("NEXTEST").is_ok() {
         8
     } else {
         10_000
@@ -28,7 +28,7 @@ fn dispatch_count() -> usize {
 fn dispatch_count_bindless() -> usize {
     // On CI we only want to run a very lightweight version of the benchmark
     // to ensure that it does not break.
-    if std::env::var("WGPU_TESTING").is_ok() {
+    if std::env::var("NEXTEST").is_ok() {
         8
     } else {
         1_000

diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
@@ -11,9 +11,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use crate::DeviceState;
 
 fn draw_count() -> usize {
-    // On CI we only want to run a very lightweight version of the benchmark
+    // When testing we only want to run a very lightweight version of the benchmark
     // to ensure that it does not break.
-    if std::env::var("WGPU_TESTING").is_ok() {
+    if std::env::var("NEXTEST").is_ok() {
         8
     } else {
         10_000

diff --git a/benches/benches/root.rs b/benches/benches/root.rs
@@ -1,6 +1,7 @@
 use criterion::criterion_main;
 use pollster::block_on;
 
+mod bind_groups;
 mod computepass;
 mod renderpass;
 mod resource_creation;
@@ -61,6 +62,7 @@ impl DeviceState {
 }
 
 criterion_main!(
+    bind_groups::bind_groups,
     renderpass::renderpass,
     computepass::computepass,
     resource_creation::resource_creation,