diff --git a/.deny.toml b/.deny.toml index f7c233c5d4..29e876c4b9 100644 --- a/.deny.toml +++ b/.deny.toml @@ -19,6 +19,7 @@ allow = [ "CC0-1.0", "ISC", "MIT", + "MIT-0", "MPL-2.0", "Unicode-DFS-2016", "Zlib", diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c0f68eaab..718c940bc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -172,6 +172,18 @@ By @teoxoy in [#4185](https://github.com/gfx-rs/wgpu/pull/4185) - Add an overview of `RenderPass` and how render state works. By @kpreid in [#4055](https://github.com/gfx-rs/wgpu/pull/4055) +### Examples + +- Added the following examples: By @JustAnotherCodemonkey in [#3885](https://github.com/gfx-rs/wgpu/pull/3885). + - repeated-compute + - storage-texture + - render-to-texture + - uniform-values + - hello-workgroups + - hello-synchronization +- Created `wgpu-example::utils` module to contain misc functions and such that are common code but aren't part of the example framework. Add to it the functions `output_image_wasm` and `output_image_native`, both for outputting `Vec` RGBA images either to the disc or the web page. By @JustAnotherCodemonkey in [#3885](https://github.com/gfx-rs/wgpu/pull/3885). +- Removed `capture` example as it had issues (did not run on wasm) and has been replaced by `render-to-texture` (see above). By @JustAnotherCodemonkey in [#3885](https://github.com/gfx-rs/wgpu/pull/3885). + ## v0.17.2 (2023-10-03) ### Bug Fixes @@ -247,6 +259,8 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht ### Added/New Features +#### General + - Empty scissor rects are allowed now, matching the specification. by @PJB3005 in [#3863](https://github.com/gfx-rs/wgpu/pull/3863). - Add back components info to `TextureFormat`s. By @teoxoy in [#3843](https://github.com/gfx-rs/wgpu/pull/3843). - Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (https://github.com/gfx-rs/wgpu/pull/4042). diff --git a/Cargo.lock b/Cargo.lock index 1a2765b8bc..a24ec2ac24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -433,6 +433,12 @@ dependencies = [ "web-sys", ] +[[package]] +name = "const_panic" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6051f239ecec86fde3410901ab7860d458d160371533842974fc61f96d15879b" + [[package]] name = "convert_case" version = "0.4.0" @@ -819,6 +825,38 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +[[package]] +name = "encase" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fce2eeef77fd4a293a54b62aa00ac9daebfbcda4bf8998c5a815635b004aa1c" +dependencies = [ + "const_panic", + "encase_derive", + "glam", + "thiserror", +] + +[[package]] +name = "encase_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e520cde08cbf4f7cc097f61573ec06ce467019803de8ae82fb2823fa1554a0e" +dependencies = [ + "encase_derive_impl", +] + +[[package]] +name = "encase_derive_impl" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fe2568f851fd6144a45fa91cfed8fe5ca8fc0b56ba6797bfc1ed2771b90e37c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + [[package]] name = "encoding_rs" version = "0.8.33" @@ -1142,8 +1180,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -3229,24 +3269,6 @@ dependencies = [ "winit 0.28.7", ] -[[package]] -name = "wgpu-capture-example" -version = "0.17.0" -dependencies = [ - "bytemuck", - "console_error_panic_hook", - "console_log", - "env_logger", - "futures-intrusive", - "png", - "pollster", - "wasm-bindgen-futures", - "wasm-bindgen-test", - "wgpu", - "wgpu-example", - "winit 0.28.7", -] - [[package]] name = "wgpu-conservative-raster-example" version = "0.17.0" @@ -3391,6 +3413,25 @@ dependencies = [ "wgpu-test", ] +[[package]] +name = "wgpu-hello-synchronization-example" +version = "0.17.0" +dependencies = [ + "bytemuck", + "console_error_panic_hook", + "console_log", + "env_logger", + "futures-intrusive", + "log", + "pollster", + "wasm-bindgen-futures", + "wasm-bindgen-test", + "web-sys", + "wgpu", + "wgpu-example", + "wgpu-test", +] + [[package]] name = "wgpu-hello-triangle-example" version = "0.17.0" @@ -3418,6 +3459,23 @@ dependencies = [ "winit 0.28.7", ] +[[package]] +name = "wgpu-hello-workgroups-example" +version = "0.17.0" +dependencies = [ + "bytemuck", + "console_error_panic_hook", + "console_log", + "env_logger", + "futures-intrusive", + "log", + "pollster", + "wasm-bindgen-futures", + "web-sys", + "wgpu", + "wgpu-example", +] + [[package]] name = "wgpu-info" version = "0.17.0" @@ -3459,6 +3517,46 @@ dependencies = [ "winit 0.28.7", ] +[[package]] +name = "wgpu-render-to-texture-example" +version = "0.17.0" +dependencies = [ + "bytemuck", + "console_error_panic_hook", + "console_log", + "env_logger", + "futures-intrusive", + "log", + "png", + "pollster", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "wgpu", + "wgpu-example", + "winit 0.28.7", +] + +[[package]] +name = "wgpu-repeated-compute-example" +version = "0.17.0" +dependencies = [ + "bytemuck", + "console_error_panic_hook", + "console_log", + "env_logger", + "futures-intrusive", + "getrandom 0.2.10", + "log", + "pollster", + "wasm-bindgen-futures", + "wasm-bindgen-test", + "web-sys", + "wgpu", + "wgpu-example", + "wgpu-test", +] + [[package]] name = "wgpu-shadow-example" version = "0.17.0" @@ -3500,6 +3598,27 @@ dependencies = [ "winit 0.28.7", ] +[[package]] +name = "wgpu-storage-texture-example" +version = "0.17.0" +dependencies = [ + "bytemuck", + "console_error_panic_hook", + "console_log", + "env_logger", + "futures-intrusive", + "log", + "png", + "pollster", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test", + "web-sys", + "wgpu", + "wgpu-example", + "wgpu-test", +] + [[package]] name = "wgpu-test" version = "0.17.0" @@ -3567,6 +3686,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wgpu-uniform-values-example" +version = "0.17.0" +dependencies = [ + "console_error_panic_hook", + "console_log", + "encase", + "env_logger", + "glam", + "png", + "pollster", + "wasm-bindgen-futures", + "web-sys", + "wgpu", + "winit 0.28.7", +] + [[package]] name = "wgpu-water-example" version = "0.17.0" diff --git a/examples/README.md b/examples/README.md index bf7c9b82a2..50e409e835 100644 --- a/examples/README.md +++ b/examples/README.md @@ -6,49 +6,88 @@ For the simplest examples without using any helping code (see `framework.rs` her - `hello-triangle` for graphics and presentation - `hello-compute` for pure computing -Notably, `capture` example shows rendering without a surface/window. It reads back the contents and saves them to a file. +### Summary of examples -All the examples use [WGSL](https://gpuweb.github.io/gpuweb/wgsl.html) shaders unless specified otherwise. +A summary of the basic examples as split along the graphics and compute "pathways", layed out roughly in order of building on each other. Those further indented and thus more roughly dependant on more other examples tend to be more complicated as well as those further down. It should be noted though that computing examples, even though they are mentioned further down (because rendering to a window is by far the most common use case), tend to be less complex as they require less surrounding context to create and manage a window to render to. -All framework-based examples render to the window and are reftested against the screenshot in the directory. +The rest of the example are for demonstrating specific features that you can come back for later when you know what those features are. + +#### General + +- `hello` - Demonstrates the basics of the WGPU library by getting a default Adapter and debugging it to the screen + +#### Graphics + +- `hello-triangle` - Provides an example of a bare-bones WGPU workflow using the Winit crate that simply renders a red triangle on a green background. + - `uniform-values` - Demonstrates the basics of enabling shaders and the GPU in general to access app state through uniform variables. `uniform-values` also serves as an example of rudimentary app building as the app stores state and takes window-captured keyboard events. The app displays the Mandelbrot Set in grayscale (similar to `storage-texture`) but allows the user to navigate and explore it using their arrow keys and scroll wheel. + - `cube` - Introduces the user to slightly more advanced models. The example creates a set of triangles to form a cube on the CPU and then uses a vertex and index buffer to send the generated model to the GPU for usage in rendering. It also uses a texture generated on the CPU to shade the sides of the cube and a uniform variable to apply a transformation matrix to the cube in the shader. + - `bunnymark` - Demonstrates many things but chief among them, preforming numerous draw calls with different bind groups in one render pass. The example also uses textures for the icon and uniform buffers to transfer both global and per-particle state. + - `skybox` - Shows off too many concepts to list here. The name comes from game development where a "skybox" acts as a background for rendering, usually to add a sky texture for immersion although they can also be used for backdrops to give the idea of a world beyond of the game scene. This example does so much more than this though as it uses a car model loaded from a file and uses the user's mouse to rotate the car model in 3d. `skybox` also makes use of depth textures and similar app patterns to `uniform-values`. + - `shadow` - Likely by far the most complex example (certainly the largest in lines of code) of the official WGPU examples. `shadow` demonstrates basic scene rendering with the main attraction being lighting and shadows (as the name implies). It is recommended that any user looking into lighting be very familiar with the basic concepts of not only rendering with WGPU but the primary mathematical ideas of computer graphics. +- `render-to-texture` - Renders to an image texture offscreen, demonstrating both off-screen rendering as well as how to add a sort of resolution-agnostic screenshot feature to an engine. This example either outputs an image file of your naming (pass command line arguments after specifying a `--` like `cargo run --bin render-to-texture -- "test.png"`) or adds an `img` element containing the image to the page in WASM. + +#### Compute + +- `hello-compute` - Demonstrates the basic workflow for getting arrays of numbers to the GPU, executing a shader on them, and getting the results back. The operation it preforms is finding the Collatz value (how many iterations of the [Collatz equation](https://en.wikipedia.org/wiki/Collatz_conjecture) it takes for the number to either reach 1 or overflow) of a set of numbers and prints the results. + - `repeated-compute` - Mostly for going into detail on subjects `hello-compute` did not. It, too, computes the Collatz conjecture but this time, it automatically loads large arrays of randomly generated numbers, prints them, runs them, and prints the result. It does this cycle 10 times. + - `hello-workgroups` - Teaches the user about the basics of compute workgroups; what they are and what they can do. + - `hello-synchronization` - Teaches the user about synchronization in WGSL, the ability to force all invocations in a workgroup to synchronize with each other before continuing via a sort of barrier. + - `storage-texture` - Demonstrates the use of storage textures as outputs to compute shaders. The example on the outside seems very similar to `render-to-texture` in that it outputs an image either to the file system or the web page except displaying a grayscale render of the Mandelbrot Set. However, inside, the example dispatches a grid of compute workgroups, one for each pixel which calculates the pixel value and stores it to the corresponding pixel of the output storage texture. + +#### Combined + +- `boids` - Demonstrates how to combine compute and render workflows by preforming a [boid](https://en.wikipedia.org/wiki/Boids) simulation and rendering the boids to the screen as little triangles. ## Feature matrix -| Feature | boids | bunnymark | cube | mipmap | msaa-line | shadow | skybox | texture-arrays | water | conservative-raster | stencil-triangles | -| ---------------------------- | ------ | --------- | ------ | ------ | --------- | ------ | ------ | -------------- | ------ | ------------------- | ----------------- | -| vertex attributes | :star: | | :star: | | :star: | :star: | :star: | :star: | :star: | | | -| instancing | :star: | | | | | | | | | | | -| lines and points | | | | | :star: | | | | | :star: | | -| dynamic buffer offsets | | :star: | | | | :star: | | | | | | -| implicit layout | | | | :star: | | | | | | | | -| sampled color textures | :star: | :star: | :star: | :star: | | | :star: | :star: | :star: | :star: | | -| storage textures | :star: | | | | | | | | | | | -| comparison samplers | | | | | | :star: | | | | | | -| subresource views | | | | :star: | | :star: | | | | | | -| cubemaps | | | | | | | :star: | | | | | -| multisampling | | | | | :star: | | | | | | | -| off-screen rendering | | | | | | :star: | | | :star: | :star: | | -| stencil testing | | | | | | | | | | | :star: | -| depth testing | | | | | | :star: | :star: | | :star: | | | -| depth biasing | | | | | | :star: | | | | | | -| read-only depth | | | | | | | | | :star: | | | -| blending | | :star: | :star: | | | | | | :star: | | | -| render bundles | | | | | :star: | | | | :star: | | | -| compute passes | :star: | | | | | | | | | | | -| error scopes | | | :star: | | | | | | | | | -| _optional extensions_ | | | | | | | | :star: | | | | -| - SPIR-V shaders | | | | | | | | | | | | -| - binding array | | | | | | | | :star: | | | | -| - push constants | | | | | | | | | | | | -| - depth clamping | | | | | | :star: | | | | | | -| - compressed textures | | | | | | | :star: | | | | | -| - polygon mode | | | :star: | | | | | | | | | -| - queries | | | | :star: | | | | | | | | -| - conservative rasterization | | | | | | | | | | :star: | | -| _integrations_ | | | | | | | | | | | | -| - staging belt | | | | | | | :star: | | | | | -| - typed arena | | | | | | | | | | | | -| - obj loading | | | | | | | :star: | | | | | +| Feature | boids | bunnymark | conservative-raster | cube | hello-synchronization | hello-workgroups | mipmap | msaa-line | render-to-texture | repeated-compute | shadow | skybox | stencil-triangles | storage-texture | texture-arrays | uniform-values | water | +| ---------------------------- | ------ | --------- | ------------------- | ------ | --------------------- | ---------------- | ------ | --------- | ----------------- | ---------------- | ------ | ------ | ----------------- | --------------- | -------------- | -------------- | ------ | +| vertex attributes | :star: | | | :star: | | | | :star: | | | :star: | :star: | | | :star: | | :star: | +| instancing | :star: | | | | | | | | | | | | | | | | | +| lines and points | | | :star: | | | | | :star: | | | | | | | | | | +| dynamic buffer offsets | | :star: | | | | | | | | | :star: | | | | | | | +| implicit layout | | | | | | | :star: | | | | | | | | | | | +| sampled color textures | :star: | :star: | :star: | :star: | | | :star: | | | | | :star: | | | :star: | | :star: | +| storage textures | :star: | | | | | | | | | | | | | :star: | | | | +| comparison samplers | | | | | | | | | | | :star: | | | | | | | +| subresource views | | | | | | | :star: | | | | :star: | | | | | | | +| cubemaps | | | | | | | | | | | | :star: | | | | | | +| multisampling | | | | | | | | :star: | | | | | | | | | | +| off-screen rendering | | | :star: | | | | | | :star: | | :star: | | | | | | :star: | +| stencil testing | | | | | | | | | | | | | :star: | | | | | +| depth testing | | | | | | | | | | | :star: | :star: | | | | | :star: | +| depth biasing | | | | | | | | | | | :star: | | | | | | | +| read-only depth | | | | | | | | | | | | | | | | | :star: | +| blending | | :star: | | :star: | | | | | | | | | | | | | :star: | +| render bundles | | | | | | | | :star: | | | | | | | | | :star: | +| uniform buffers | | | | | | | | | | | | | | | | :star: | | +| compute passes | :star: | | | | :star: | :star: | | | | :star: | | | | :star: | | | | +| buffer mapping | | | | | :star: | :star: | | | | :star: | | | | :star: | | | | +| error scopes | | | | :star: | | | | | | | | | | | | | | +| compute workgroups | | | | | :star: | :star: | | | | | | | | | | | | +| compute synchronization | | | | | :star: | | | | | | | | | | | | | +| _optional extensions_ | | | | | | | | | | | | | | | :star: | | | +| - SPIR-V shaders | | | | | | | | | | | | | | | | | | +| - binding array | | | | | | | | | | | | | | | :star: | | | +| - push constants | | | | | | | | | | | | | | | | | | +| - depth clamping | | | | | | | | | | | :star: | | | | | | | +| - compressed textures | | | | | | | | | | | | :star: | | | | | | +| - polygon mode | | | | :star: | | | | | | | | | | | | | | +| - queries | | | | | | | :star: | | | | | | | | | | | +| - conservative rasterization | | | :star: | | | | | | | | | | | | | | | +| _integrations_ | | | | | | | | | | | | | | | | | | +| - staging belt | | | | | | | | | | | | :star: | | | | | | +| - typed arena | | | | | | | | | | | | | | | | | | +| - obj loading | | | | | | | | | | | | :star: | | | | | | + + +## Additional notes + +Note that the examples regarding computing build off of each other; repeated-compute extends hello-compute, hello-workgroups assumes you know the basic workflow of gpu computation, and hello-synchronization assumes you know what a workgroup is. Also note that the computing examples cannot be downleveled to WebGL as WebGL does not allow storage textures. Running these in a browser will require that browser to support WebGPU. + +All the examples use [WGSL](https://gpuweb.github.io/gpuweb/wgsl.html) shaders unless specified otherwise. + +All framework-based examples render to the window and are reftested against the screenshot in the directory. ## Hacking diff --git a/examples/capture/README.md b/examples/capture/README.md deleted file mode 100644 index 2baea436db..0000000000 --- a/examples/capture/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# capture - -This example shows how to capture an image by rendering it to a texture, copying the texture to -a buffer, and retrieving it from the buffer. - -This could be used for "taking a screenshot," with the added benefit that this method doesn't -require a window to be created. - -## To Run - -``` -cargo run --bin capture -open examples/capture/red.png -``` - -## Screenshots - -![Capture example](./screenshot.png) diff --git a/examples/capture/screenshot.png b/examples/capture/screenshot.png deleted file mode 100644 index 4021a58f6e..0000000000 Binary files a/examples/capture/screenshot.png and /dev/null differ diff --git a/examples/capture/src/main.rs b/examples/capture/src/main.rs deleted file mode 100644 index 47a453de6b..0000000000 --- a/examples/capture/src/main.rs +++ /dev/null @@ -1,265 +0,0 @@ -use std::env; -/// This example shows how to capture an image by rendering it to a texture, copying the texture to -/// a buffer, and retrieving it from the buffer. This could be used for "taking a screenshot," with -/// the added benefit that this method doesn't require a window to be created. -use std::fs::File; -use std::io::Write; -use std::mem::size_of; -use wgpu::{Buffer, Device, SubmissionIndex}; - -async fn run(png_output_path: &str) { - let args: Vec<_> = env::args().collect(); - let (width, height) = match args.len() { - // 0 on wasm, 1 on desktop - 0 | 1 => (100usize, 200usize), - 3 => (args[1].parse().unwrap(), args[2].parse().unwrap()), - _ => { - println!("Incorrect number of arguments, possible usages:"); - println!("* 0 arguments - uses default width and height of (100, 200)"); - println!("* 2 arguments - uses specified width and height values"); - return; - } - }; - let (device, buffer, buffer_dimensions, submission_index) = - create_red_image_with_dimensions(width, height).await; - create_png( - png_output_path, - device, - buffer, - &buffer_dimensions, - submission_index, - ) - .await; -} - -async fn create_red_image_with_dimensions( - width: usize, - height: usize, -) -> (Device, Buffer, BufferDimensions, SubmissionIndex) { - let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all); - let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { - backends, - dx12_shader_compiler: wgpu::Dx12Compiler::default(), - gles_minor_version: wgpu::Gles3MinorVersion::default(), - }); - let adapter = instance - .request_adapter(&wgpu::RequestAdapterOptions::default()) - .await - .unwrap(); - - let (device, queue) = adapter - .request_device( - &wgpu::DeviceDescriptor { - label: None, - features: wgpu::Features::empty(), - limits: wgpu::Limits::downlevel_defaults(), - }, - None, - ) - .await - .unwrap(); - - // It is a WebGPU requirement that ImageCopyBuffer.layout.bytes_per_row % wgpu::COPY_BYTES_PER_ROW_ALIGNMENT == 0 - // So we calculate padded_bytes_per_row by rounding unpadded_bytes_per_row - // up to the next multiple of wgpu::COPY_BYTES_PER_ROW_ALIGNMENT. - // https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding - let buffer_dimensions = BufferDimensions::new(width, height); - // The output buffer lets us retrieve the data as an array - let output_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: None, - size: (buffer_dimensions.padded_bytes_per_row * buffer_dimensions.height) as u64, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - - let texture_extent = wgpu::Extent3d { - width: buffer_dimensions.width as u32, - height: buffer_dimensions.height as u32, - depth_or_array_layers: 1, - }; - - // The render pipeline renders data into this texture - let texture = device.create_texture(&wgpu::TextureDescriptor { - size: texture_extent, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8UnormSrgb, - usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, - label: None, - view_formats: &[], - }); - - // Set the background to be red - let command_buffer = { - let mut encoder = - device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - encoder.begin_render_pass(&wgpu::RenderPassDescriptor { - label: None, - color_attachments: &[Some(wgpu::RenderPassColorAttachment { - view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), - resolve_target: None, - ops: wgpu::Operations { - load: wgpu::LoadOp::Clear(wgpu::Color::RED), - store: wgpu::StoreOp::Store, - }, - })], - depth_stencil_attachment: None, - timestamp_writes: None, - occlusion_query_set: None, - }); - - // Copy the data from the texture to the buffer - encoder.copy_texture_to_buffer( - texture.as_image_copy(), - wgpu::ImageCopyBuffer { - buffer: &output_buffer, - layout: wgpu::ImageDataLayout { - offset: 0, - bytes_per_row: Some(buffer_dimensions.padded_bytes_per_row as u32), - rows_per_image: None, - }, - }, - texture_extent, - ); - - encoder.finish() - }; - - let index = queue.submit(Some(command_buffer)); - (device, output_buffer, buffer_dimensions, index) -} - -async fn create_png( - png_output_path: &str, - device: Device, - output_buffer: Buffer, - buffer_dimensions: &BufferDimensions, - submission_index: SubmissionIndex, -) { - // Note that we're not calling `.await` here. - let buffer_slice = output_buffer.slice(..); - // Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished. - let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); - buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap()); - - // Poll the device in a blocking manner so that our future resolves. - // In an actual application, `device.poll(...)` should - // be called in an event loop or on another thread. - // - // We pass our submission index so we don't need to wait for any other possible submissions. - device.poll(wgpu::Maintain::WaitForSubmissionIndex(submission_index)); - - if let Some(Ok(())) = receiver.receive().await { - // If a file system is available, write the buffer as a PNG - let has_file_system_available = cfg!(not(target_arch = "wasm32")); - if !has_file_system_available { - return; - } - let padded_buffer = buffer_slice.get_mapped_range(); - - let mut png_encoder = png::Encoder::new( - File::create(png_output_path).unwrap(), - buffer_dimensions.width as u32, - buffer_dimensions.height as u32, - ); - png_encoder.set_depth(png::BitDepth::Eight); - png_encoder.set_color(png::ColorType::Rgba); - let mut png_writer = png_encoder - .write_header() - .unwrap() - .into_stream_writer_with_size(buffer_dimensions.unpadded_bytes_per_row) - .unwrap(); - - // from the padded_buffer we write just the unpadded bytes into the image - for chunk in padded_buffer.chunks(buffer_dimensions.padded_bytes_per_row) { - png_writer - .write_all(&chunk[..buffer_dimensions.unpadded_bytes_per_row]) - .unwrap(); - } - png_writer.finish().unwrap(); - - // With the current interface, we have to make sure all mapped views are - // dropped before we unmap the buffer. - drop(padded_buffer); - - output_buffer.unmap(); - } -} - -struct BufferDimensions { - width: usize, - height: usize, - unpadded_bytes_per_row: usize, - padded_bytes_per_row: usize, -} - -impl BufferDimensions { - fn new(width: usize, height: usize) -> Self { - let bytes_per_pixel = size_of::(); - let unpadded_bytes_per_row = width * bytes_per_pixel; - let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as usize; - let padded_bytes_per_row_padding = (align - unpadded_bytes_per_row % align) % align; - let padded_bytes_per_row = unpadded_bytes_per_row + padded_bytes_per_row_padding; - Self { - width, - height, - unpadded_bytes_per_row, - padded_bytes_per_row, - } - } -} - -fn main() { - #[cfg(not(target_arch = "wasm32"))] - { - env_logger::init(); - pollster::block_on(run("red.png")); - } - #[cfg(target_arch = "wasm32")] - { - std::panic::set_hook(Box::new(console_error_panic_hook::hook)); - console_log::init().expect("could not initialize logger"); - wasm_bindgen_futures::spawn_local(run("red.png")); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use wgpu::BufferView; - - wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); - - #[test] - // This test never creates a canvas, so will always fail on webgl2. - // #[wasm_bindgen_test::wasm_bindgen_test] - fn ensure_generated_data_matches_expected() { - assert_generated_data_matches_expected(); - } - - fn assert_generated_data_matches_expected() { - let (device, output_buffer, dimensions, submission_index) = - pollster::block_on(create_red_image_with_dimensions(100usize, 200usize)); - let buffer_slice = output_buffer.slice(..); - buffer_slice.map_async(wgpu::MapMode::Read, |_| ()); - device.poll(wgpu::Maintain::WaitForSubmissionIndex(submission_index)); - let padded_buffer = buffer_slice.get_mapped_range(); - let expected_buffer_size = dimensions.padded_bytes_per_row * dimensions.height; - assert_eq!(padded_buffer.len(), expected_buffer_size); - assert_that_content_is_all_red(&dimensions, padded_buffer); - } - - fn assert_that_content_is_all_red(dimensions: &BufferDimensions, padded_buffer: BufferView) { - let red = [0xFFu8, 0, 0, 0xFFu8]; - let single_rgba = 4; - padded_buffer - .chunks(dimensions.padded_bytes_per_row) - .map(|padded_buffer_row| &padded_buffer_row[..dimensions.unpadded_bytes_per_row]) - .for_each(|unpadded_row| { - unpadded_row - .chunks(single_rgba) - .for_each(|chunk| assert_eq!(chunk, &red)) - }); - } -} diff --git a/examples/common/src/framework.rs b/examples/common/src/framework.rs index dc373ca876..a897367b9f 100644 --- a/examples/common/src/framework.rs +++ b/examples/common/src/framework.rs @@ -4,6 +4,8 @@ use std::str::FromStr; #[cfg(not(target_arch = "wasm32"))] use std::time::Instant; #[cfg(target_arch = "wasm32")] +use wasm_bindgen::prelude::*; +#[cfg(target_arch = "wasm32")] use web_sys::{ImageBitmapRenderingContext, OffscreenCanvas}; use winit::{ event::{self, WindowEvent}, @@ -122,7 +124,6 @@ async fn setup(title: &str) -> Setup { let mut offscreen_canvas_setup: Option = None; #[cfg(target_arch = "wasm32")] { - use wasm_bindgen::JsCast; use winit::platform::web::WindowExtWebSys; let query_string = web_sys::window().unwrap().location().search().unwrap(); @@ -444,8 +445,6 @@ pub fn run(title: &str) { #[cfg(target_arch = "wasm32")] pub fn run(title: &str) { - use wasm_bindgen::prelude::*; - let title = title.to_owned(); wasm_bindgen_futures::spawn_local(async move { let setup = setup::(&title).await; diff --git a/examples/common/src/lib.rs b/examples/common/src/lib.rs index 0c717499e5..b647b81834 100644 --- a/examples/common/src/lib.rs +++ b/examples/common/src/lib.rs @@ -1 +1,2 @@ pub mod framework; +pub mod utils; diff --git a/examples/common/src/utils.rs b/examples/common/src/utils.rs new file mode 100644 index 0000000000..7b663e2bc3 --- /dev/null +++ b/examples/common/src/utils.rs @@ -0,0 +1,158 @@ +#[cfg(not(target_arch = "wasm32"))] +use std::io::Write; +#[cfg(target_arch = "wasm32")] +use wasm_bindgen::prelude::*; + +/// Replaces the site body with a message telling the user to open the console and use that. +#[cfg(target_arch = "wasm32")] +pub fn add_web_nothing_to_see_msg() { + web_sys::window() + .and_then(|window| window.document()) + .and_then(|document| document.body()) + .expect("Could not get document / body.") + .set_inner_html("

Nothing to see here! Open the console!

"); +} + +/// Outputs a vector of RGBA bytes as a png image with the given dimensions on the given path. +#[cfg(not(target_arch = "wasm32"))] +pub fn output_image_native(image_data: Vec, texture_dims: (usize, usize), path: String) { + let mut png_data = Vec::::with_capacity(image_data.len()); + let mut encoder = png::Encoder::new( + std::io::Cursor::new(&mut png_data), + texture_dims.0 as u32, + texture_dims.1 as u32, + ); + encoder.set_color(png::ColorType::Rgba); + let mut png_writer = encoder.write_header().unwrap(); + png_writer.write_image_data(&image_data[..]).unwrap(); + png_writer.finish().unwrap(); + log::info!("PNG file encoded in memory."); + + let mut file = std::fs::File::create(&path).unwrap(); + file.write_all(&png_data[..]).unwrap(); + log::info!("PNG file written to disc as \"{}\".", path); +} + +/// Effectively a version of `output_image_native` but meant for web browser contexts. +/// +/// This is achieved via in `img` element on the page. If the target image element does +/// not exist, this function creates one. If it does, the image data is overridden. +/// +/// This function makes use of a hidden staging canvas which the data is copied to in +/// order to create a data URL. +#[cfg(target_arch = "wasm32")] +pub fn output_image_wasm(image_data: Vec, texture_dims: (usize, usize)) { + let document = web_sys::window().unwrap().document().unwrap(); + let body = document.body().unwrap(); + + let canvas = if let Some(found_canvas) = document.get_element_by_id("staging-canvas") { + match found_canvas.dyn_into::() { + Ok(canvas_as_canvas) => canvas_as_canvas, + Err(e) => { + log::error!( + "In searching for a staging canvas for outputting an image \ + (element with id \"staging-canvas\"), found non-canvas element: {:?}. + Replacing with standard staging canvas.", + e + ); + e.remove(); + create_staging_canvas(&document) + } + } + } else { + log::info!("Output image staging canvas element not found; creating."); + create_staging_canvas(&document) + }; + // Having the size attributes the right size is so important, we should always do it + // just to be safe. Also, what if we might want the image size to be able to change? + let image_dimension_strings = (texture_dims.0.to_string(), texture_dims.1.to_string()); + canvas + .set_attribute("width", image_dimension_strings.0.as_str()) + .unwrap(); + canvas + .set_attribute("height", image_dimension_strings.1.as_str()) + .unwrap(); + + let context = canvas + .get_context("2d") + .unwrap() + .unwrap() + .dyn_into::() + .unwrap(); + let image_data = web_sys::ImageData::new_with_u8_clamped_array( + wasm_bindgen::Clamped(&image_data), + texture_dims.0 as u32, + ) + .unwrap(); + context.put_image_data(&image_data, 0.0, 0.0).unwrap(); + + // Get the img element that will act as our target for rendering from the canvas. + let image_element = if let Some(found_image_element) = + document.get_element_by_id("output-image-target") + { + match found_image_element.dyn_into::() { + Ok(e) => e, + Err(e) => { + log::error!( + "Found an element with the id \"output-image-target\" but it was not an image: {:?}. + Replacing with default image output element.", + e + ); + e.remove(); + create_output_image_element(&document) + } + } + } else { + log::info!("Output image element not found; creating."); + create_output_image_element(&document) + }; + // The canvas is currently the image we ultimately want. We can create a data url from it now. + let data_url = canvas.to_data_url().unwrap(); + image_element.set_src(&data_url); + log::info!("Copied image from staging canvas to image element."); + + if document.get_element_by_id("image-for-you-text").is_none() { + log::info!("\"Image for you\" text not found; creating."); + let p = document + .create_element("p") + .expect("Failed to create p element for \"image for you text\"."); + p.set_text_content(Some( + "The above image is for you! + You can drag it to your desktop to download.", + )); + p.set_id("image-for-you-text"); + body.append_child(&p) + .expect("Failed to append \"image for you text\" to document body."); + } +} + +#[cfg(target_arch = "wasm32")] +fn create_staging_canvas(document: &web_sys::Document) -> web_sys::HtmlCanvasElement { + let body = document.body().expect("Failed to get document body."); + let new_canvas = document + .create_element("canvas") + .expect("Failed to create staging canvas.") + .dyn_into::() + .unwrap(); + // We don't want to show the canvas, we just want it to exist in the background. + new_canvas.set_attribute("hidden", "true").unwrap(); + new_canvas.set_attribute("background-color", "red").unwrap(); + body.append_child(&new_canvas).unwrap(); + log::info!("Created new staging canvas: {:?}", &new_canvas); + new_canvas +} + +#[cfg(target_arch = "wasm32")] +fn create_output_image_element(document: &web_sys::Document) -> web_sys::HtmlImageElement { + let body = document.body().expect("Failed to get document body."); + let new_image = document + .create_element("img") + .expect("Failed to create output image element.") + .dyn_into::() + .unwrap(); + new_image.set_id("output-image-target"); + body.append_child(&new_image) + .expect("Failed to append output image target to document body."); + log::info!("Created new output target image: {:?}", &new_image); + new_image +} diff --git a/examples/hello-synchronization/Cargo.toml b/examples/hello-synchronization/Cargo.toml new file mode 100644 index 0000000000..fb31c55cad --- /dev/null +++ b/examples/hello-synchronization/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "wgpu-hello-synchronization-example" +version.workspace = true +license.workspace = true +edition.workspace = true +description = "wgpu hello synchronization example" +publish = false + +[[bin]] +name = "hello-synchronization" +path = "src/main.rs" + +[dependencies] +bytemuck.workspace = true +env_logger.workspace = true +futures-intrusive.workspace = true +log.workspace = true +pollster.workspace = true +wgpu.workspace = true +wgpu-example.workspace = true + +[target.'cfg(target_arch = "wasm32")'.dependencies] +console_error_panic_hook.workspace = true +console_log.workspace = true +wasm-bindgen-futures.workspace = true +web-sys = { workspace = true, features = ["Document"] } + +[dev-dependencies] +wasm-bindgen-test.workspace = true +wgpu-test.workspace = true \ No newline at end of file diff --git a/examples/hello-synchronization/README.md b/examples/hello-synchronization/README.md new file mode 100644 index 0000000000..41c7d0d34a --- /dev/null +++ b/examples/hello-synchronization/README.md @@ -0,0 +1,19 @@ +# hello-synchronization + +This example is +1. A small demonstration of the importance of synchronization. +2. How basic synchronization you can understand from the CPU is preformed on the GPU. + +## A Primer on WGSL Synchronization Functions + +The official documentation is a little scattered and sparse. The meat of the subject is found [here](https://www.w3.org/TR/2023/WD-WGSL-20230629/#sync-builtin-functions) but there's also a bit on control barriers [here](https://www.w3.org/TR/2023/WD-WGSL-20230629/#control-barrier). The most important part comes from that first link though, where the spec says "the affected memory and atomic operations program-ordered before the synchronization function must be visible to all other threads in the workgroup before any affected memory or atomic operation program-ordered after the synchronization function is executed by a member of the workgroup." And at the second, we also get "a control barrier is executed by all invocations in the same workgroup as if it were executed concurrently." + +That's rather vague (and it is by design) so let's break it down and make a comparison that should make that sentence come a bit more into focus. [Barriers in Rust](https://doc.rust-lang.org/std/sync/struct.Barrier.html#) fit both bills rather nicely. Firstly, Rust barriers are executed as if they were executed concurrently because they are - at least as long as you define the execution by when it finishes, when [`Barrier::wait`](https://doc.rust-lang.org/std/sync/struct.Barrier.html#method.wait) finally unblocks the thread and execution continues concurrently from there. Rust barriers also fit the first bill; because all affected threads must execute `Barrier::wait` in order for execution to continue, we can guarantee that _all (synchronous)_ operations ordered before the wait call are executed before any operations ordered after the wait call begin execution. Applying this to WGSL barriers, we can think of a barrier in WGSL as a checkpoint all invocations within each workgroup must reach before the entire workgroup continues with the program together. + +There are two key differences though and one is that although Rust barriers don't enforce that atomic operations called before the barrier are visible after the barrier, WGSL barriers do. This is incredibly useful and important though and is demonstrated in this example. + +Another is that WGSL's synchronous functions only affect memory and atomic operations in a certain address space. This applies to the whole 'all atomic operations called before the function are visible after the function' thing. There are currently three different synchronization functions: +- `storageBarrier` which works in the storage address space and is a simple barrier. +- `workgroupBarrier` which works in the workgroup address space and is a simple barrier. +- `workgroupUniformLoad` which also works in the workgroup address space and is more than just a barrier. +Read up on all three [here](https://www.w3.org/TR/2023/WD-WGSL-20230629/#sync-builtin-functions). \ No newline at end of file diff --git a/examples/hello-synchronization/src/main.rs b/examples/hello-synchronization/src/main.rs new file mode 100644 index 0000000000..2a8ecac914 --- /dev/null +++ b/examples/hello-synchronization/src/main.rs @@ -0,0 +1,211 @@ +const ARR_SIZE: usize = 128; + +struct ExecuteResults { + patient_workgroup_results: Vec, + hasty_workgroup_results: Vec, +} + +async fn run() { + let instance = wgpu::Instance::default(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::empty(), + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + let ExecuteResults { + patient_workgroup_results, + hasty_workgroup_results, + } = execute(&device, &queue, ARR_SIZE).await; + + // Print data + log::info!("Patient results: {:?}", patient_workgroup_results); + if !patient_workgroup_results.iter().any(|e| *e != 16) { + log::info!("patient_main was patient."); + } else { + log::error!("patient_main was not patient!"); + } + log::info!("Hasty results: {:?}", hasty_workgroup_results); + if hasty_workgroup_results.iter().any(|e| *e != 16) { + log::info!("hasty_main was not patient."); + } else { + log::info!("hasty_main got lucky."); + } +} + +async fn execute( + device: &wgpu::Device, + queue: &wgpu::Queue, + result_vec_size: usize, +) -> ExecuteResults { + let mut local_patient_workgroup_results = vec![0u32; result_vec_size]; + let mut local_hasty_workgroup_results = local_patient_workgroup_results.clone(); + + let shaders_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shaders.wgsl"))), + }); + + let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: std::mem::size_of_val(local_patient_workgroup_results.as_slice()) as u64, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: std::mem::size_of_val(local_patient_workgroup_results.as_slice()) as u64, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer.as_entire_binding(), + }], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + let patient_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &shaders_module, + entry_point: "patient_main", + }); + let hasty_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &shaders_module, + entry_point: "hasty_main", + }); + + //---------------------------------------------------------- + + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + compute_pass.set_pipeline(&patient_pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups(local_patient_workgroup_results.len() as u32, 1, 1); + } + queue.submit(Some(command_encoder.finish())); + + get_data( + local_patient_workgroup_results.as_mut_slice(), + &storage_buffer, + &output_staging_buffer, + device, + queue, + ) + .await; + + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + compute_pass.set_pipeline(&hasty_pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups(local_patient_workgroup_results.len() as u32, 1, 1); + } + queue.submit(Some(command_encoder.finish())); + + get_data( + local_hasty_workgroup_results.as_mut_slice(), + &storage_buffer, + &output_staging_buffer, + device, + queue, + ) + .await; + + ExecuteResults { + patient_workgroup_results: local_patient_workgroup_results, + hasty_workgroup_results: local_hasty_workgroup_results, + } +} + +async fn get_data( + output: &mut [T], + storage_buffer: &wgpu::Buffer, + staging_buffer: &wgpu::Buffer, + device: &wgpu::Device, + queue: &wgpu::Queue, +) { + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + command_encoder.copy_buffer_to_buffer( + storage_buffer, + 0, + staging_buffer, + 0, + std::mem::size_of_val(output) as u64, + ); + queue.submit(Some(command_encoder.finish())); + let buffer_slice = staging_buffer.slice(..); + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + device.poll(wgpu::Maintain::Wait); + receiver.receive().await.unwrap().unwrap(); + output.copy_from_slice(bytemuck::cast_slice(&buffer_slice.get_mapped_range()[..])); + staging_buffer.unmap(); +} + +fn main() { + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::builder() + .filter_level(log::LevelFilter::Info) + .format_timestamp_nanos() + .init(); + pollster::block_on(run()); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init_with_level(log::Level::Info).expect("could not initialize logger"); + + wgpu_example::utils::add_web_nothing_to_see_msg(); + + wasm_bindgen_futures::spawn_local(run()); + } +} + +#[cfg(test)] +mod tests; diff --git a/examples/hello-synchronization/src/shaders.wgsl b/examples/hello-synchronization/src/shaders.wgsl new file mode 100644 index 0000000000..dd3b2dd38f --- /dev/null +++ b/examples/hello-synchronization/src/shaders.wgsl @@ -0,0 +1,30 @@ +@group(0) +@binding(0) +var output: array; + +var count: atomic; + +@compute +@workgroup_size(16) +fn patient_main( + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) workgroup_id: vec3 +) { + atomicAdd(&count, 1u); + workgroupBarrier(); + if (local_id.x == 0u) { + output[workgroup_id.x] = atomicLoad(&count); + } +} + +@compute +@workgroup_size(16) +fn hasty_main( + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) workgroup_id: vec3 +) { + atomicAdd(&count, 1u); + if (local_id.x == 0u) { + output[workgroup_id.x] = atomicLoad(&count); + } +} \ No newline at end of file diff --git a/examples/hello-synchronization/src/tests.rs b/examples/hello-synchronization/src/tests.rs new file mode 100644 index 0000000000..c626938233 --- /dev/null +++ b/examples/hello-synchronization/src/tests.rs @@ -0,0 +1,23 @@ +use super::*; +use pollster::FutureExt; +use wgpu_test::{initialize_test, TestParameters}; + +wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[wasm_bindgen_test::wasm_bindgen_test] +fn hello_synchronization_test_results() { + initialize_test( + // Taken from hello-compute tests. + TestParameters::default() + .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) + .limits(wgpu::Limits::downlevel_defaults()), + |ctx| { + let ExecuteResults { + patient_workgroup_results, + hasty_workgroup_results: _, + } = execute(&ctx.device, &ctx.queue, ARR_SIZE).block_on(); + assert_eq!(patient_workgroup_results, [16_u32; ARR_SIZE]); + }, + ); +} diff --git a/examples/capture/Cargo.toml b/examples/hello-workgroups/Cargo.toml similarity index 71% rename from examples/capture/Cargo.toml rename to examples/hello-workgroups/Cargo.toml index 87c9b6c38f..20183f6d48 100644 --- a/examples/capture/Cargo.toml +++ b/examples/hello-workgroups/Cargo.toml @@ -1,28 +1,26 @@ [package] -name = "wgpu-capture-example" +name = "wgpu-hello-workgroups-example" version.workspace = true license.workspace = true edition.workspace = true -description = "wgpu capture example" +description = "wgpu hello workgroups example" publish = false [[bin]] -name = "capture" +name = "hello-workgroups" path = "src/main.rs" [dependencies] bytemuck.workspace = true env_logger.workspace = true futures-intrusive.workspace = true +log.workspace = true pollster.workspace = true -png.workspace = true -wasm-bindgen-test.workspace = true -wgpu-example.workspace = true wgpu.workspace = true -winit.workspace = true +wgpu-example.workspace = true [target.'cfg(target_arch = "wasm32")'.dependencies] console_error_panic_hook.workspace = true console_log.workspace = true wasm-bindgen-futures.workspace = true - +web-sys = { workspace = true, features = ["Document"] } diff --git a/examples/hello-workgroups/README.md b/examples/hello-workgroups/README.md new file mode 100644 index 0000000000..5b913af06d --- /dev/null +++ b/examples/hello-workgroups/README.md @@ -0,0 +1,68 @@ +# hello-workgroups + +Now you finally know what that silly little `@workgroup_size(1)` means! + +This example is an extremely bare-bones and arguably somewhat unreasonable demonstration of what workgroup sizes mean in an attempt to explain workgroups in general. + +The example starts with two arrays of numbers. One where `a[i] = i` and the other where `b[i] = 2i`. Both are bound to the shader. The program dispatches a workgroup for each index, each workgroup representing both elements at that index in both arrays. Each invocation in each workgroup works on its respective array and adds 1 to the element there. + +## What are Workgroups? + +### TLDR / Key Takeaways + +- Workgroups fit in a 3d grid of workgroups executed in a single dispatch. +- All invocations in a workgroup are guaranteed to execute concurrently. +- Workgroups carry no other guarantees for concurrency outside of those individual workgroups, meaning... + - No two workgroups can be guaranteed to be executed in parallel. + - No two workgroups can be guaranteed NOT to be executed in parallel. + - No set of workgroups can be guaranteed to execute in any predictable or reliable order in relation to each other. +- Ths size of a workgroup is defined with the `@workgroup_size` attribute on a compute shader main function. +- The location of an invocation within its workgroup grid can be got with `@builtin(local_invocation_id)`. +- The location of an invocation within the entire compute shader grid can be gotten with `@builtin(global_invocation_id)`. +- The location of an invocation's workgroup within the dispatch grid can be gotten with `@builtin(workgroup_id)`. +- Workgroups share memory within the `workgroup` address space. Workgroup memory is similar to private memory but it is shared within a workgroup. Invocations within a workgroup will see the same memory but invocations across workgroups will be accessing different memory. + +### Introduction + +When you call `ComputePass::dispatch_workgroups`, the function dispatches multiple workgroups in a 3d grid defined by the `x`, `y`, and `z` parameters you pass to the function. For example, `dispatch_workgroups(5, 2, 1)` would create a dispatch grid like +|||||| +|---|---|---|---|---| +| W | W | W | W | W | +| W | W | W | W | W | + +Where each W is a workgroup. If you want your shader to consider what workgroup within the dispatch the current invocation is in, add a function argument with type `vec3` and with the attribute `@builtin(workgroup_id)`. + +Note here that in this example, the term "dispatch grid" is used throughout to mean the grid of workgroups within the dispatch but is not a proper term within WGSL. Other terms to know though that are proper are "workgroup grid" which refers to the invocations in a single _workgroup_ and "compute shader grid" which refers to the grid of _all_ the invocations in the _entire dispatch_. + +### Within the Workgroup + +Although with hello-compute and repeated-compute, we used a workgroup size of `(1)`, or rather, (1, 1, 1), and then each workgroup called from `dispatch_workgroups` made _an_ invocation, this isn't always the case. Each workgroup represents its own little grid of individual invocations tied together. This could be just one or practically any number in a 3d grid of invocations. The grid size of each workgroup and thus the number of invocations called per workgroup is determined by the `@workgroup_size` attribute you've seen in other compute shaders. To get the current invocation's location within a workgroup, add a `vec3` argument to the main function with the attribute `@builtin(local_invocation_id)`. We'll look at the compute shader grid of a dispatch of size (2, 2, 1) with workgroup sizes of (2, 2, 1) as well. Let `w` be the `workgroup_id` and `i` be the `local_invocation_id`. + +||||| +|------------------------|------------------------|------------------------|------------------------| +| w(0, 0, 0), i(0, 0, 0) | w(0, 0, 0), i(1, 0, 0) | w(1, 0, 0), i(0, 0, 0) | w(1, 0, 0), i(1, 0, 0) | +| w(0, 0, 0), i(0, 1, 0) | w(0, 0, 0), i(1, 1, 0) | w(1, 0, 0), i(0, 1, 0) | w(1, 0, 0), i(1, 1, 0) | +| w(0, 1, 0), i(0, 0, 0) | w(0, 1, 0), i(1, 0, 0) | w(1, 1, 0), i(0, 0, 0) | w(1, 1, 0), i(1, 0, 0) | +| w(0, 1, 0), i(0, 1, 0) | w(0, 1, 0), i(1, 1, 0) | w(1, 1, 0), i(0, 1, 0) | w(1, 1, 0), i(1, 1, 0) | + +### Execution of Workgroups + +As stated before, workgroups are groups of invocations. The invocations within a workgroup are always guaranteed to execute in parallel. That said, the guarantees basically stop there. You cannot get any guarantee as to when any given workgroup will execute, including in relation to other workgroups. You can't guarantee that any two workgroups will execute together nor can you guarantee that they will _not_ execute together. Of the workgroups that don't execute together, you additionally cannot guarantee that they will execute in any particular order. When your function runs in an invocation, you know that it will be working together with its workgroup buddies and that's basically it. + +See [the WGSL spec on compute shader execution](https://www.w3.org/TR/2023/WD-WGSL-20230629/#compute-shader-workgroups) for more details. + +### Workgroups and their Invocations in a Global Scope + +As mentioned above, invocations exist both within the context of a workgroup grid as well as a compute shader grid which is a grid, divided into workgroup sections, of invocations that represents the whole of the dispatch. Similar to how `@builtin(local_invocation_id)` gets you the place of the invocation within the workgroup grid, `@builtin(global_invocation_id)` gets you the place of the invocation within the entire compute shader grid. Slight trivia: you might imagine that this is computed from `local_invocation_id` and `workgroup_id` but it's actually the opposite. Everything operates on the compute shader grid, the workgroups are imagined sectors within the compute shader grid, and `local_invocation_id` and `workgroup_id` are calculated based on global id and known workgroup size. Yes, we live in a matrix... of compute shader invocations. This isn't super useful information but it can help fit things into a larger picture. + +## Barriers and Workgroups + +Arguably, workgroups are at their most useful when being used alongside barriers. Since barriers are already explained more thoroughly in the hello-synchronization example, this section will be short. Despite affecting different memory address spaces, all synchronization functions affect invocations on a workgroup level, synchronizing the workgroup. See [hello-synchronization/README.md](../hello-synchronization/README.md) for more. + +## Links to Technical Resources + +For a rather long explainer, this README may still leave the more technically minded person with questions. The specifications for both WebGPU and WGSL ("WebGPU Shading Language") are long and it's rather unintuitive that by far the vast majority of specification on how workgroups and compute shaders more generally work, is all in the WGSL spec. Below are some links into the specifications at a couple interesting points: + +- [Here](https://www.w3.org/TR/WGSL/#compute-shader-workgroups) is the main section on workgroups and outlines important terminology in technical terms. It is recommended that everyone looking for something in this section of this README start by reading this. +- [Here](https://www.w3.org/TR/webgpu/#computing-operations) is a section on compute shaders from a WebGPU perspective (instead of WGSL). It's still a stub but hopefully it will grow in the future. +- Don't forget your [`@builtin()`'s](https://www.w3.org/TR/WGSL/#builtin-inputs-outputs)! \ No newline at end of file diff --git a/examples/hello-workgroups/src/main.rs b/examples/hello-workgroups/src/main.rs new file mode 100644 index 0000000000..53e8b2adca --- /dev/null +++ b/examples/hello-workgroups/src/main.rs @@ -0,0 +1,199 @@ +//! This example assumes that you've seen hello-compute and or repeated-compute +//! and thus have a general understanding of what's going on here. +//! +//! There's an explainer on what this example does exactly and what workgroups +//! are and the meaning of `@workgroup(size_x, size_y, size_z)` in the +//! README. Also see commenting in shader.wgsl as well. +//! +//! Only parts specific to this example will be commented. + +use wgpu::util::DeviceExt; + +async fn run() { + let mut local_a = [0i32; 100]; + for (i, e) in local_a.iter_mut().enumerate() { + *e = i as i32; + } + log::info!("Input a: {local_a:?}"); + let mut local_b = [0i32; 100]; + for (i, e) in local_b.iter_mut().enumerate() { + *e = i as i32 * 2; + } + log::info!("Input b: {local_b:?}"); + + let instance = wgpu::Instance::default(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::empty(), + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shader.wgsl"))), + }); + + let storage_buffer_a = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice(&local_a[..]), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + }); + let storage_buffer_b = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice(&local_b[..]), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + }); + let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: std::mem::size_of_val(&local_a) as u64, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer_a.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: storage_buffer_b.as_entire_binding(), + }, + ], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &shader, + entry_point: "main", + }); + + //---------------------------------------------------------- + + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + compute_pass.set_pipeline(&pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + /* Note that since each workgroup will cover both arrays, we only need to + cover the length of one array. */ + compute_pass.dispatch_workgroups(local_a.len() as u32, 1, 1); + } + queue.submit(Some(command_encoder.finish())); + + //---------------------------------------------------------- + + get_data( + &mut local_a[..], + &storage_buffer_a, + &output_staging_buffer, + &device, + &queue, + ) + .await; + get_data( + &mut local_b[..], + &storage_buffer_b, + &output_staging_buffer, + &device, + &queue, + ) + .await; + + log::info!("Output in A: {local_a:?}"); + log::info!("Output in B: {local_b:?}"); +} + +async fn get_data( + output: &mut [T], + storage_buffer: &wgpu::Buffer, + staging_buffer: &wgpu::Buffer, + device: &wgpu::Device, + queue: &wgpu::Queue, +) { + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + command_encoder.copy_buffer_to_buffer( + storage_buffer, + 0, + staging_buffer, + 0, + std::mem::size_of_val(output) as u64, + ); + queue.submit(Some(command_encoder.finish())); + let buffer_slice = staging_buffer.slice(..); + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + device.poll(wgpu::Maintain::Wait); + receiver.receive().await.unwrap().unwrap(); + output.copy_from_slice(bytemuck::cast_slice(&buffer_slice.get_mapped_range()[..])); + staging_buffer.unmap(); +} + +fn main() { + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::builder() + .filter_level(log::LevelFilter::Info) + .format_timestamp_nanos() + .init(); + pollster::block_on(run()); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init_with_level(log::Level::Info).expect("could not initialize logger"); + + wgpu_example::utils::add_web_nothing_to_see_msg(); + + wasm_bindgen_futures::spawn_local(run()); + } +} diff --git a/examples/hello-workgroups/src/shader.wgsl b/examples/hello-workgroups/src/shader.wgsl new file mode 100644 index 0000000000..c1676663ab --- /dev/null +++ b/examples/hello-workgroups/src/shader.wgsl @@ -0,0 +1,23 @@ +// This is useful because we can't use, say, vec2> because +// of array being unsized. Normally we would interweave them or use +// and array of structs but this is just for the sake of demonstration. + +@group(0) +@binding(0) +var a: array; + +@group(0) +@binding(1) +var b: array; + +@compute +@workgroup_size(2, 1, 1) +fn main(@builtin(local_invocation_id) lid: vec3, @builtin(workgroup_id) wid: vec3) { + if lid.x == 0u { + // Do computation (use your imagionation) + a[wid.x] += 1; + } else if lid.x == 1u { + // Do computation + b[wid.x] += 1; + } +} \ No newline at end of file diff --git a/examples/render-to-texture/Cargo.toml b/examples/render-to-texture/Cargo.toml new file mode 100644 index 0000000000..042467de03 --- /dev/null +++ b/examples/render-to-texture/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "wgpu-render-to-texture-example" +version.workspace = true +license.workspace = true +edition.workspace = true +description = "wgpu render to texture example" +publish = false + +[[bin]] +name = "render-to-texture" +path = "src/main.rs" + +[dependencies] +bytemuck.workspace = true +env_logger.workspace = true +futures-intrusive.workspace = true +log.workspace = true +pollster.workspace = true +wgpu.workspace = true +wgpu-example.workspace = true +winit.workspace = true + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +png.workspace = true + +[target.'cfg(target_arch = "wasm32")'.dependencies] +console_error_panic_hook.workspace = true +console_log.workspace = true +wasm-bindgen.workspace = true +wasm-bindgen-futures.workspace = true +web-sys = { workspace = true, features = [ + "HtmlCanvasElement", "Document", "CanvasRenderingContext2d", "Window", "ImageData", + "HtmlImageElement" +] } diff --git a/examples/render-to-texture/README.md b/examples/render-to-texture/README.md new file mode 100644 index 0000000000..dbc1943234 --- /dev/null +++ b/examples/render-to-texture/README.md @@ -0,0 +1,5 @@ +# render-to-texture + +Similar to hello-triangle but instead of rendering to a window or canvas, renders to a texture that is then output as an image like the storage-texture example. + +If all goes well, the end result should look familiarly like hello-triangle with its red triangle on a green background. \ No newline at end of file diff --git a/examples/render-to-texture/src/main.rs b/examples/render-to-texture/src/main.rs new file mode 100644 index 0000000000..cc0a191132 --- /dev/null +++ b/examples/render-to-texture/src/main.rs @@ -0,0 +1,170 @@ +#[cfg(not(target_arch = "wasm32"))] +use wgpu_example::utils::output_image_native; +#[cfg(target_arch = "wasm32")] +use wgpu_example::utils::output_image_wasm; + +const TEXTURE_DIMS: (usize, usize) = (512, 512); + +async fn run(_path: Option) { + // This will later store the raw pixel value data locally. We'll create it now as + // a convenient size reference. + let mut texture_data = Vec::::with_capacity(TEXTURE_DIMS.0 * TEXTURE_DIMS.1 * 4); + + let instance = wgpu::Instance::default(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::empty(), + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shader.wgsl"))), + }); + + let render_target = device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: TEXTURE_DIMS.0 as u32, + height: TEXTURE_DIMS.1 as u32, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8UnormSrgb, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, + view_formats: &[wgpu::TextureFormat::Rgba8UnormSrgb], + }); + let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: texture_data.capacity() as u64, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: None, + layout: None, + vertex: wgpu::VertexState { + module: &shader, + entry_point: "vs_main", + buffers: &[], + }, + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: "fs_main", + targets: &[Some(wgpu::TextureFormat::Rgba8UnormSrgb.into())], + }), + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + }); + + log::info!("Wgpu context set up."); + + //----------------------------------------------- + + let texture_view = render_target.create_view(&wgpu::TextureViewDescriptor::default()); + + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + { + let mut render_pass = command_encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &texture_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::GREEN), + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + occlusion_query_set: None, + timestamp_writes: None, + }); + render_pass.set_pipeline(&pipeline); + render_pass.draw(0..3, 0..1); + } + // The texture now contains our rendered image + command_encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + texture: &render_target, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::ImageCopyBuffer { + buffer: &output_staging_buffer, + layout: wgpu::ImageDataLayout { + offset: 0, + // This needs to be a multiple of 256. Normally we would need to pad + // it but we here know it will work out anyways. + bytes_per_row: Some((TEXTURE_DIMS.0 * 4) as u32), + rows_per_image: Some(TEXTURE_DIMS.1 as u32), + }, + }, + wgpu::Extent3d { + width: TEXTURE_DIMS.0 as u32, + height: TEXTURE_DIMS.1 as u32, + depth_or_array_layers: 1, + }, + ); + queue.submit(Some(command_encoder.finish())); + log::info!("Commands submitted."); + + //----------------------------------------------- + + // Time to get our image. + let buffer_slice = output_staging_buffer.slice(..); + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + device.poll(wgpu::Maintain::Wait); + receiver.receive().await.unwrap().unwrap(); + log::info!("Output buffer mapped."); + { + let view = buffer_slice.get_mapped_range(); + texture_data.extend_from_slice(&view[..]); + } + log::info!("Image data copied to local."); + output_staging_buffer.unmap(); + + #[cfg(not(target_arch = "wasm32"))] + output_image_native(texture_data.to_vec(), TEXTURE_DIMS, _path.unwrap()); + #[cfg(target_arch = "wasm32")] + output_image_wasm(texture_data.to_vec(), TEXTURE_DIMS); + log::info!("Done."); +} + +fn main() { + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::builder() + .filter_level(log::LevelFilter::Info) + .format_timestamp_nanos() + .init(); + + let path = std::env::args() + .nth(1) + .unwrap_or_else(|| "please_don't_git_push_me.png".to_string()); + pollster::block_on(run(Some(path))); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init_with_level(log::Level::Info).expect("could not initialize logger"); + wasm_bindgen_futures::spawn_local(run(None)); + } +} diff --git a/examples/render-to-texture/src/shader.wgsl b/examples/render-to-texture/src/shader.wgsl new file mode 100644 index 0000000000..f7131a1be1 --- /dev/null +++ b/examples/render-to-texture/src/shader.wgsl @@ -0,0 +1,14 @@ +@vertex +fn vs_main(@builtin(vertex_index) in_vertex_index: u32) -> @builtin(position) vec4 { + var vertices = array, 3>( + vec4(0.0, 1.0, 0.0, 1.0), + vec4(-1.0, -1.0, 0.0, 1.0), + vec4(1.0, -1.0, 0.0, 1.0) + ); + return vertices[in_vertex_index]; +} + +@fragment +fn fs_main() -> @location(0) vec4 { + return vec4(1.0, 0.0, 0.0, 1.0); +} diff --git a/examples/repeated-compute/Cargo.toml b/examples/repeated-compute/Cargo.toml new file mode 100644 index 0000000000..6df3820e7e --- /dev/null +++ b/examples/repeated-compute/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "wgpu-repeated-compute-example" +version.workspace = true +license.workspace = true +edition.workspace = true +description = "wgpu repeated compute example" +publish = false + +[[bin]] +name = "repeated-compute" +path = "src/main.rs" + +[dependencies] +bytemuck.workspace = true +env_logger.workspace = true +futures-intrusive.workspace = true +getrandom = { version = "0.2.10", features = ["js"] } +log.workspace = true +pollster.workspace = true +wgpu.workspace = true +wgpu-example.workspace = true + +[target.'cfg(target_arch = "wasm32")'.dependencies] +console_error_panic_hook.workspace = true +console_log.workspace = true +wasm-bindgen-futures.workspace = true +web-sys = { workspace = true, features = ["Document"] } + +[dev-dependencies] +wasm-bindgen-test.workspace = true +wgpu-test.workspace = true diff --git a/examples/repeated-compute/README.md b/examples/repeated-compute/README.md new file mode 100644 index 0000000000..26e247c17e --- /dev/null +++ b/examples/repeated-compute/README.md @@ -0,0 +1,15 @@ +# repeated-compute + +Repeatedly performs the Collatz calculation used in `hello-compute` on sets of random numbers. + +## Sample + +Randomly generated input: +``` +[61917, 53957, 5717, 40520, 41020, 5120, 44281, 19584, 2975, 5310, 4162, 38159, 25343, 16551, 40532, 31464, 64505, 55815, 34793, 24285, 62190, 10530, 49321, 57494, 18473, 18291, 9067, 2665, 53877, 6754, 37616, 51136, 54990, 31159, 38648, 24127, 49640, 12095, 4529, 56275, 18200, 24423, 14065, 17512, 31421, 19612, 63709, 47666, 21805, 13608, 63529, 17809, 6737, 55362, 24647, 30348, 44906, 46325, 503, 52776, 63112, 20785, 63338, 28904, 55772, 56851, 53870, 65503, 30290, 57374, 61244, 39866, 625, 2353, 54901, 25511, 64046, 47882, 22723, 54917, 19563, 24130, 54374, 41964, 3999, 2805, 918, 32932, 6717, 46311, 4818, 28843, 37972, 50981, 31555, 39064, 42814, 37957, 17963, 22678, 3048, 18823, 7293, 63312, 29086, 45580, 5347, 1761, 19090, 41520, 35919, 38705, 51378, 29090, 31100, 55324, 26807, 26017, 24295, 62389, 51934, 27026, 1795, 14965, 51274, 10875, 21396, 22828, 37077, 49922, 46486, 55817, 58928, 64455, 47269, 53484, 6602, 52270, 24417, 6525, 60485, 6389, 10336, 62651, 15721, 8793, 37174, 11962, 768, 21426, 9919, 14295, 55401, 33099, 2221, 9021, 793, 27731, 58923, 28847, 56634, 20447, 33108, 11355, 32437, 15594, 26951, 62607, 28151, 46173, 53140, 48397, 64164, 12279, 54591, 36440, 42712, 3495, 28316, 4674, 35028, 50809, 17289, 3355, 6840, 38134, 29806, 53215, 12076, 55685, 31314, 33548, 51846, 29484, 36845, 12242, 11836, 5449, 11549, 12626, 23699, 52777, 350, 19344, 6380, 63964, 49649, 42487, 26543, 60198, 43868, 38280, 12917, 33574, 44104, 24176, 1348, 47752, 34890, 1471, 34329, 59348, 25115, 148, 62147, 12340, 23654, 26821, 3695, 41075, 15125, 56593, 44273, 34180, 35209, 26294, 48642, 19007, 40617, 46831, 9988, 522, 36478, 64700, 31220, 41376, 43870, 6053, 56665, 56475, 475, 60238, 38170, 53613, 23654, 26273] +``` + +Resulting output: +``` +["148", "78", "36", "75", "150", "15", "163", "43", "48", "54", "64", "80", "201", "120", "36", "147", "192", "65", "129", "157", "60", "42", "189", "73", "92", "66", "47", "53", "91", "36", "62", "78", "215", "54", "124", "144", "158", "94", "64", "83", "22", "100", "58", "35", "85", "105", "254", "101", "56", "63", "78", "97", "181", "228", "219", "72", "132", "57", "66", "34", "104", "149", "148", "121", "60", "104", "91", "130", "165", "78", "86", "106", "25", "32", "122", "113", "47", "96", "82", "60", "79", "51", "184", "88", "188", "84", "129", "147", "88", "114", "121", "165", "80", "83", "103", "75", "194", "155", "48", "131", "110", "61", "163", "55", "165", "70", "116", "104", "79", "106", "93", "75", "52", "134", "54", "91", "108", "126", "188", "148", "109", "38", "68", "133", "127", "117", "48", "30", "36", "52", "114", "184", "135", "161", "83", "52", "137", "109", "69", "137", "86", "124", "104", "179", "84", "127", "62", "50", "15", "30", "148", "102", "78", "160", "32", "140", "77", "90", "135", "165", "104", "180", "129", "161", "160", "146", "183", "148", "108", "145", "109", "70", "104", "125", "78", "62", "49", "56", "103", "59", "36", "202", "110", "92", "57", "54", "165", "171", "68", "109", "85", "67", "171", "46", "124", "174", "99", "160", "130", "156", "100", "83", "81", "61", "75", "55", "158", "101", "77", "91", "119", "75", "76", "129", "101", "95", "114", "96", "142", "171", "111", "122", "64", "23", "179", "37", "82", "46", "206", "150", "40", "104", "101", "129", "155", "64", "65", "154", "212", "132", "91", "30", "67", "148", "178", "106", "163", "67", "60", "135", "27", "117", "106", "109", "82", "201"] +``` \ No newline at end of file diff --git a/examples/repeated-compute/src/main.rs b/examples/repeated-compute/src/main.rs new file mode 100644 index 0000000000..4795756e93 --- /dev/null +++ b/examples/repeated-compute/src/main.rs @@ -0,0 +1,256 @@ +//! See hello-compute example main.rs for more details +//! as similar items here are not explained. +//! +//! This example does elaborate on some things though that the +//! hello-compute example does not such as mapping buffers +//! and why use the async channels. + +use std::mem::size_of_val; + +const OVERFLOW: u32 = 0xffffffff; + +async fn run() { + let mut numbers = [0u32; 256]; + let context = WgpuContext::new(size_of_val(&numbers)).await; + + for _ in 0..10 { + for p in numbers.iter_mut() { + *p = generate_rand() as u32; + } + + compute(&mut numbers, &context).await; + + let printed_numbers = numbers + .iter() + .map(|n| match n { + &OVERFLOW => "(overflow)".to_string(), + n => n.to_string(), + }) + .collect::>(); + log::info!("Results: {printed_numbers:?}"); + } +} + +fn generate_rand() -> u16 { + let mut bytes = [0u8; 2]; + getrandom::getrandom(&mut bytes[..]).unwrap(); + u16::from_le_bytes(bytes) +} + +async fn compute(local_buffer: &mut [u32], context: &WgpuContext) { + log::info!("Beginning GPU compute on data {local_buffer:?}."); + // Local buffer contents -> GPU storage buffer + // Adds a write buffer command to the queue. This command is more complicated + // than it appears. + context.queue.write_buffer( + &context.storage_buffer, + 0, + bytemuck::cast_slice(local_buffer), + ); + log::info!("Wrote to buffer."); + + let mut command_encoder = context + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + { + let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + compute_pass.set_pipeline(&context.pipeline); + compute_pass.set_bind_group(0, &context.bind_group, &[]); + compute_pass.dispatch_workgroups(local_buffer.len() as u32, 1, 1); + } + // We finish the compute pass by dropping it. + + // Entire storage buffer -> staging buffer. + command_encoder.copy_buffer_to_buffer( + &context.storage_buffer, + 0, + &context.output_staging_buffer, + 0, + context.storage_buffer.size(), + ); + + // Finalize the command encoder, add the contained commands to the queue and flush. + context.queue.submit(Some(command_encoder.finish())); + log::info!("Submitted commands."); + + // Finally time to get our results. + // First we get a buffer slice which represents a chunk of the buffer (which we + // can't access yet). + // We want the whole thing so use unbounded range. + let buffer_slice = context.output_staging_buffer.slice(..); + // Now things get complicated. WebGPU, for safety reasons, only allows either the GPU + // or CPU to access a buffer's contents at a time. We need to "map" the buffer which means + // flipping ownership of the buffer over to the CPU and making access legal. We do this + // with `BufferSlice::map_async`. + // + // The problem is that map_async is not an async function so we can't await it. What + // we need to do instead is pass in a closure that will be executed when the slice is + // either mapped or the mapping has failed. + // + // The problem with this is that we don't have a reliable way to wait in the main + // code for the buffer to be mapped and even worse, calling get_mapped_range or + // get_mapped_range_mut prematurely will cause a panic, not return an error. + // + // Using channels solves this as awaiting the receiving of a message from + // the passed closure will force the outside code to wait. It also doesn't hurt + // if the closure finishes before the outside code catches up as the message is + // buffered and receiving will just pick that up. + // + // It may also be worth noting that although on native, the usage of asynchronous + // channels is wholely unnecessary, for the sake of portability to WASM (std channels + // don't work on WASM,) we'll use async channels that work on both native and WASM. + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + // In order for the mapping to be completed, one of three things must happen. + // One of those can be calling `Device::poll`. This isn't necessary on the web as devices + // are polled automatically but natively, we need to make sure this happens manually. + // `Maintain::Wait` will cause the thread to wait on native but not the web. + context.device.poll(wgpu::Maintain::Wait); + log::info!("Device polled."); + // Now we await the receiving and panic if anything went wrong because we're lazy. + receiver.receive().await.unwrap().unwrap(); + log::info!("Result received."); + // NOW we can call get_mapped_range. + { + let view = buffer_slice.get_mapped_range(); + local_buffer.copy_from_slice(bytemuck::cast_slice(&view)); + } + log::info!("Results written to local buffer."); + // We need to make sure all `BufferView`'s are dropped before we do what we're about + // to do. + // Unmap so that we can copy to the staging buffer in the next iteration. + context.output_staging_buffer.unmap(); +} + +fn main() { + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::builder() + .filter_level(log::LevelFilter::Info) + .format_timestamp_nanos() + .init(); + pollster::block_on(run()); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init_with_level(log::Level::Info).expect("could not initialize logger"); + + wgpu_example::utils::add_web_nothing_to_see_msg(); + + wasm_bindgen_futures::spawn_local(run()); + } +} + +/// A convenient way to hold together all the useful wgpu stuff together. +struct WgpuContext { + device: wgpu::Device, + queue: wgpu::Queue, + pipeline: wgpu::ComputePipeline, + bind_group: wgpu::BindGroup, + storage_buffer: wgpu::Buffer, + output_staging_buffer: wgpu::Buffer, +} + +impl WgpuContext { + async fn new(buffer_size: usize) -> WgpuContext { + let instance = wgpu::Instance::default(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::empty(), + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + // Our shader, kindly compiled with Naga. + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!( + "shader.wgsl" + ))), + }); + + // This is where the GPU will read from and write to. + let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: buffer_size as wgpu::BufferAddress, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + // For portability reasons, WebGPU draws a distinction between memory that is + // accessible by the CPU and memory that is accessible by the GPU. Only + // buffers accessible by the CPU can be mapped and accessed by the CPU and + // only buffers visible to the GPU can be used in shaders. In order to get + // data from the GPU, we need to use CommandEncoder::copy_buffer_to_buffer + // (which we will later) to copy the buffer modified by the GPU into a + // mappable, CPU-accessible buffer which we'll create here. + let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: buffer_size as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + // This can be though of as the function signature for our CPU-GPU function. + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + // Going to have this be None just to be safe. + min_binding_size: None, + }, + count: None, + }], + }); + // This ties actual resources stored in the GPU to our metaphorical function + // through the binding slots we defined above. + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer.as_entire_binding(), + }], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &shader, + entry_point: "main", + }); + + WgpuContext { + device, + queue, + pipeline, + bind_group, + storage_buffer, + output_staging_buffer, + } + } +} diff --git a/examples/repeated-compute/src/shader.wgsl b/examples/repeated-compute/src/shader.wgsl new file mode 100644 index 0000000000..41af4363a2 --- /dev/null +++ b/examples/repeated-compute/src/shader.wgsl @@ -0,0 +1,38 @@ +@group(0) +@binding(0) +var v_indices: array; // this is used as both input and output for convenience + +// The Collatz Conjecture states that for any integer n: +// If n is even, n = n/2 +// If n is odd, n = 3n+1 +// And repeat this process for each new n, you will always eventually reach 1. +// Though the conjecture has not been proven, no counterexample has ever been found. +// This function returns how many times this recurrence needs to be applied to reach 1. +fn collatz_iterations(n_base: u32) -> u32{ + var n: u32 = n_base; + var i: u32 = 0u; + loop { + if (n <= 1u) { + break; + } + if (n % 2u == 0u) { + n = n / 2u; + } + else { + // Overflow? (i.e. 3*n + 1 > 0xffffffffu?) + if (n >= 1431655765u) { // 0x55555555u + return 4294967295u; // 0xffffffffu + } + + n = 3u * n + 1u; + } + i = i + 1u; + } + return i; +} + +@compute +@workgroup_size(1) +fn main(@builtin(global_invocation_id) global_id: vec3) { + v_indices[global_id.x] = collatz_iterations(v_indices[global_id.x]); +} diff --git a/examples/storage-texture/Cargo.toml b/examples/storage-texture/Cargo.toml new file mode 100644 index 0000000000..d3c6903e03 --- /dev/null +++ b/examples/storage-texture/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "wgpu-storage-texture-example" +version.workspace = true +license.workspace = true +edition.workspace = true +description = "wgpu storage texture example" +publish = false + +[[bin]] +name = "storage-texture" +path = "src/main.rs" + +[dependencies] +bytemuck.workspace = true +env_logger.workspace = true +futures-intrusive.workspace = true +log.workspace = true +pollster.workspace = true +wgpu.workspace = true +wgpu-example.workspace = true + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +png.workspace = true + +[target.'cfg(target_arch = "wasm32")'.dependencies] +console_error_panic_hook.workspace = true +console_log.workspace = true +wasm-bindgen.workspace = true +wasm-bindgen-futures.workspace = true +web-sys = { workspace = true, features = [ + "HtmlCanvasElement", "Document", "CanvasRenderingContext2d", "Window", "ImageData", + "HtmlImageElement" +] } + +[dev-dependencies] +wasm-bindgen-test.workspace = true +wgpu-test.workspace = true diff --git a/examples/storage-texture/README.md b/examples/storage-texture/README.md new file mode 100644 index 0000000000..51592c66a5 --- /dev/null +++ b/examples/storage-texture/README.md @@ -0,0 +1,7 @@ +# storage-texture + +A simple example that uses a storage texture to compute an image of the Mandelbrot set (https://en.wikipedia.org/wiki/Mandelbrot_set) and either saves it as an image or presents it to the browser screen in such a way that it can be saved as an image. + +## Example Output + +![Example output](./example.png) \ No newline at end of file diff --git a/examples/storage-texture/example.png b/examples/storage-texture/example.png new file mode 100644 index 0000000000..f90a220c69 Binary files /dev/null and b/examples/storage-texture/example.png differ diff --git a/examples/storage-texture/src/main.rs b/examples/storage-texture/src/main.rs new file mode 100644 index 0000000000..2e35970123 --- /dev/null +++ b/examples/storage-texture/src/main.rs @@ -0,0 +1,182 @@ +//! This example demonstrates the basic usage of storage textures for the purpose of +//! creating a digital image of the Mandelbrot set +//! (). +//! +//! Storage textures work like normal textures but they operate similar to storage buffers +//! in that they can be written to. The issue is that as it stands, write-only is the +//! only valid access mode for storage textures in WGSL and although there is a WGPU feature +//! to allow for read-write access, this is unfortunately a native-only feature and thus +//! we won't be using it here. If we needed a reference texture, we would need to add a +//! second texture to act as a reference and attach that as well. Luckily, we don't need +//! to read anything in our shader except the dimensions of our texture, which we can +//! easily get via `textureDimensions`. +//! +//! A lot of things aren't explained here via comments. See hello-compute and +//! repeated-compute for code that is more thoroughly commented. + +#[cfg(not(target_arch = "wasm32"))] +use wgpu_example::utils::output_image_native; +#[cfg(target_arch = "wasm32")] +use wgpu_example::utils::output_image_wasm; + +const TEXTURE_DIMS: (usize, usize) = (512, 512); + +async fn run(_path: Option) { + let mut texture_data = vec![0u8; TEXTURE_DIMS.0 * TEXTURE_DIMS.1 * 4]; + + let instance = wgpu::Instance::default(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::empty(), + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shader.wgsl"))), + }); + + let storage_texture = device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: TEXTURE_DIMS.0 as u32, + height: TEXTURE_DIMS.1 as u32, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + let storage_texture_view = storage_texture.create_view(&wgpu::TextureViewDescriptor::default()); + let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: std::mem::size_of_val(&texture_data[..]) as u64, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Rgba8Unorm, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }], + }); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&storage_texture_view), + }], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &shader, + entry_point: "main", + }); + + log::info!("Wgpu context set up."); + //---------------------------------------- + + let mut command_encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.set_pipeline(&pipeline); + compute_pass.dispatch_workgroups(TEXTURE_DIMS.0 as u32, TEXTURE_DIMS.1 as u32, 1); + } + command_encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + texture: &storage_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::ImageCopyBuffer { + buffer: &output_staging_buffer, + layout: wgpu::ImageDataLayout { + offset: 0, + // This needs to be padded to 256. + bytes_per_row: Some((TEXTURE_DIMS.0 * 4) as u32), + rows_per_image: Some(TEXTURE_DIMS.1 as u32), + }, + }, + wgpu::Extent3d { + width: TEXTURE_DIMS.0 as u32, + height: TEXTURE_DIMS.1 as u32, + depth_or_array_layers: 1, + }, + ); + queue.submit(Some(command_encoder.finish())); + + let buffer_slice = output_staging_buffer.slice(..); + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + device.poll(wgpu::Maintain::Wait); + receiver.receive().await.unwrap().unwrap(); + log::info!("Output buffer mapped"); + { + let view = buffer_slice.get_mapped_range(); + texture_data.copy_from_slice(&view[..]); + } + log::info!("GPU data copied to local."); + output_staging_buffer.unmap(); + + #[cfg(not(target_arch = "wasm32"))] + output_image_native(texture_data.to_vec(), TEXTURE_DIMS, _path.unwrap()); + #[cfg(target_arch = "wasm32")] + output_image_wasm(texture_data.to_vec(), TEXTURE_DIMS); + log::info!("Done.") +} + +fn main() { + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::builder() + .filter_level(log::LevelFilter::Info) + .format_timestamp_nanos() + .init(); + + let path = std::env::args() + .nth(1) + .unwrap_or_else(|| "please_don't_git_push_me.png".to_string()); + pollster::block_on(run(Some(path))); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init_with_level(log::Level::Info).expect("could not initialize logger"); + wasm_bindgen_futures::spawn_local(run(None)); + } +} diff --git a/examples/storage-texture/src/shader.wgsl b/examples/storage-texture/src/shader.wgsl new file mode 100644 index 0000000000..0dd48b3194 --- /dev/null +++ b/examples/storage-texture/src/shader.wgsl @@ -0,0 +1,29 @@ +const MAX_ITERATIONS: u32 = 50u; + +@group(0) +@binding(0) +var texture: texture_storage_2d; + +@compute +@workgroup_size(1) +fn main(@builtin(global_invocation_id) id: vec3) { + var final_iteration = MAX_ITERATIONS; + var c = vec2( + // Translated to put everything nicely in frame. + (f32(id.x) / f32(textureDimensions(texture).x)) * 3.0 - 2.25, + (f32(id.y) / f32(textureDimensions(texture).y)) * 3.0 - 1.5 + ); + var current_z = c; + var next_z: vec2; + for (var i = 0u; i < MAX_ITERATIONS; i++) { + next_z.x = (current_z.x * current_z.x - current_z.y * current_z.y) + c.x; + next_z.y = (2.0 * current_z.x * current_z.y) + c.y; + current_z = next_z; + if length(current_z) > 4.0 { + final_iteration = i; + break; + } + } + let value = f32(final_iteration) / f32(MAX_ITERATIONS); + textureStore(texture, vec2(i32(id.x), i32(id.y)), vec4(value, value, value, 1.0)); +} \ No newline at end of file diff --git a/examples/uniform-values/Cargo.toml b/examples/uniform-values/Cargo.toml new file mode 100644 index 0000000000..86e3b64a4b --- /dev/null +++ b/examples/uniform-values/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "wgpu-uniform-values-example" +version.workspace = true +license.workspace = true +edition.workspace = true +description = "wgpu uniform vaules example" +publish = false + +[[bin]] +name = "uniform-values" +path = "src/main.rs" + +[dependencies] +encase = { version = "0.6.1", features = ["glam"] } +env_logger.workspace = true +glam = "0.24" +pollster.workspace = true +wgpu.workspace = true +winit.workspace = true + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +png.workspace = true + +[target.'cfg(target_arch = "wasm32")'.dependencies] +console_error_panic_hook.workspace = true +console_log.workspace = true +wasm-bindgen-futures.workspace = true +web-sys.workspace = true diff --git a/examples/uniform-values/README.md b/examples/uniform-values/README.md new file mode 100644 index 0000000000..19514e1e6f --- /dev/null +++ b/examples/uniform-values/README.md @@ -0,0 +1,16 @@ +# uniform-values + +Creates a window which displays a grayscale render of the [Mandelbrot set](https://en.wikipedia.org/wiki/Mandelbrot_set). Pressing the arrow keys will translate the set and scrolling the mouse wheel will zoom in and out. If the image appears too 'bright', it may be because you are using too few iterations or 'samples'. Use U and D to increase or decrease respectively the max number of iterations used. Make sure to play around with this too to get an optimally photogenic screen cap. The window can be resized and pressing ESC will close the window. Explore the Mandelbrot set using the power of uniform variables to transfer state from the main program to the shader! + +## Usage of Uniform Buffers / Variables + +Since the codebase of this example is so large (because why not demonstrate with a sort-of game) and the points of interest in terms of the actual point of the example so small, there is a module doc comment at the top of main.rs that points out the important points of the usage of uniform values. + +## Limitations +At some point in exploring the fractal, you may discover there is actually a resolution; if you zoom to deep, things become weirdly pixilated. Unfortunately, the relatively basic shader is currently limited by the faults of 32-bit floating point precision. As much as I'd like to upgrade to 64-bit floats, the support in WGSL for f64's is limited and you can't even cast to one as of time of writing. Still pretty cool though. + +## Screenshots + +![On load](screenshot1.png) +![Zoomed in](screenshot2.png) +![A different part zoomed in](screenshot3.png) \ No newline at end of file diff --git a/examples/uniform-values/screenshot1.png b/examples/uniform-values/screenshot1.png new file mode 100644 index 0000000000..33205a7d6d Binary files /dev/null and b/examples/uniform-values/screenshot1.png differ diff --git a/examples/uniform-values/screenshot2.png b/examples/uniform-values/screenshot2.png new file mode 100644 index 0000000000..3ccb8d6a13 Binary files /dev/null and b/examples/uniform-values/screenshot2.png differ diff --git a/examples/uniform-values/screenshot3.png b/examples/uniform-values/screenshot3.png new file mode 100644 index 0000000000..a8f78a5699 Binary files /dev/null and b/examples/uniform-values/screenshot3.png differ diff --git a/examples/uniform-values/src/main.rs b/examples/uniform-values/src/main.rs new file mode 100644 index 0000000000..733a863ee4 --- /dev/null +++ b/examples/uniform-values/src/main.rs @@ -0,0 +1,363 @@ +//! Points of interest for seeing uniforms in action: +//! +//! 1. the struct for the data stored in the uniform buffer is defined. +//! 2. the uniform buffer itself is created. +//! 3. the bind group that will bind the uniform buffer and it's layout are created. +//! 4. the bind group layout is attached to the pipeline layout. +//! 5. the uniform buffer and the bind group are stored alongside the pipeline. +//! 6. an instance of [`AppState`] is created. This variable will be modified +//! to change parameters in the shader and modified by app events to preform and save +//! those changes. +//! 7. (7a and 7b) the `state` variable created at (6) is modified by commands such +//! as pressing the arrow keys or zooming in or out. +//! 8. the contents of the `AppState` are loaded into the uniform buffer in preparation. +//! 9. the bind group with the uniform buffer is attached to the render pass. +//! +//! The usage of the uniform buffer within the shader itself is pretty self-explanatory given +//! some understanding of WGSL. + +// We won't bring StorageBuffer into scope as that might be too easy to confuse +// with actual GPU-allocated WGPU storage buffers. +use encase::ShaderType; +use winit::{ + event::{Event, VirtualKeyCode, WindowEvent}, + event_loop::EventLoop, + window::Window, +}; + +const ZOOM_INCREMENT_FACTOR: f32 = 1.1; +const CAMERA_POS_INCREMENT_FACTOR: f32 = 0.1; + +// (1) +#[derive(Debug, ShaderType)] +struct AppState { + pub cursor_pos: glam::Vec2, + pub zoom: f32, + pub max_iterations: u32, +} + +impl AppState { + // Translating Rust structures to WGSL is always tricky and can prove + // incredibly difficult to remember all the rules by which WGSL + // lays out and formats structs in memory. It is also often extremely + // frustrating to debug when things don't go right. + // + // You may sometimes see structs translated to bytes through + // using `#[repr(C)]` on the struct so that the struct has a defined, + // guaranteed internal layout and then implementing bytemuck's POD + // trait so that one can preform a bitwise cast. There are issues with + // this approach though as C's struct layouts aren't always compatible + // with WGSL, such as when special WGSL types like vec's and mat's + // get involved that have special alignment rules and especially + // when the target buffer is going to be used in the uniform memory + // space. + // + // Here though, we use the encase crate which makes translating potentially + // complex Rust structs easy through combined use of the [`ShaderType`] trait + // / derive macro and the buffer structs which hold data formatted for WGSL + // in either the storage or uniform spaces. + fn as_wgsl_bytes(&self) -> encase::internal::Result> { + let mut buffer = encase::UniformBuffer::new(Vec::new()); + buffer.write(self)?; + Ok(buffer.into_inner()) + } + + fn translate_view(&mut self, increments: i32, axis: usize) { + self.cursor_pos[axis] += CAMERA_POS_INCREMENT_FACTOR * increments as f32 / self.zoom; + } + + fn zoom(&mut self, amount: f32) { + self.zoom += ZOOM_INCREMENT_FACTOR * amount * self.zoom.powf(1.02); + self.zoom = self.zoom.max(1.1); + } +} + +impl Default for AppState { + fn default() -> Self { + AppState { + cursor_pos: glam::Vec2::ZERO, + zoom: 1.0, + max_iterations: 50, + } + } +} + +struct WgpuContext { + pub window: Window, + pub surface: wgpu::Surface, + pub surface_config: wgpu::SurfaceConfiguration, + pub device: wgpu::Device, + pub queue: wgpu::Queue, + pub pipeline: wgpu::RenderPipeline, + pub bind_group: wgpu::BindGroup, + pub uniform_buffer: wgpu::Buffer, +} + +impl WgpuContext { + async fn new(window: Window) -> WgpuContext { + let size = window.inner_size(); + + let instance = wgpu::Instance::default(); + let surface = unsafe { instance.create_surface(&window) }.unwrap(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + compatible_surface: Some(&surface), + force_fallback_adapter: false, + }) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::empty(), + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!( + "shader.wgsl" + ))), + }); + + // (2) + let uniform_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: std::mem::size_of::() as u64, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + // (3) + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &uniform_buffer, + offset: 0, + size: None, + }), + }], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + // (4) + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let swapchain_capabilities = surface.get_capabilities(&adapter); + let swapchain_format = swapchain_capabilities.formats[0]; + + let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + vertex: wgpu::VertexState { + module: &shader, + entry_point: "vs_main", + buffers: &[], + }, + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: "fs_main", + targets: &[Some(swapchain_format.into())], + }), + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + }); + + let surface_config = wgpu::SurfaceConfiguration { + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + format: swapchain_format, + width: size.width, + height: size.height, + present_mode: wgpu::PresentMode::Fifo, + alpha_mode: swapchain_capabilities.alpha_modes[0], + view_formats: vec![], + }; + surface.configure(&device, &surface_config); + + // (5) + WgpuContext { + window, + surface, + surface_config, + device, + queue, + pipeline, + bind_group, + uniform_buffer, + } + } + + fn resize(&mut self, new_size: winit::dpi::PhysicalSize) { + self.surface_config.width = new_size.width; + self.surface_config.height = new_size.height; + self.surface.configure(&self.device, &self.surface_config); + self.window.request_redraw(); + } +} + +async fn run(event_loop: EventLoop<()>, window: Window) { + let mut wgpu_context = Some(WgpuContext::new(window).await); + // (6) + let mut state = Some(AppState::default()); + let main_window_id = wgpu_context.as_ref().unwrap().window.id(); + event_loop.run(move |event, _, control_flow| { + control_flow.set_wait(); + match event { + Event::LoopDestroyed => { + wgpu_context = None; + state = None; + } + Event::WindowEvent { window_id, event } if window_id == main_window_id => match event { + WindowEvent::CloseRequested => { + control_flow.set_exit(); + } + WindowEvent::KeyboardInput { input, .. } => { + let state_mut = state.as_mut().unwrap(); + let wgpu_context_ref = wgpu_context.as_ref().unwrap(); + if let Some(virtual_keycode) = input.virtual_keycode { + // (7a) + match virtual_keycode { + VirtualKeyCode::Escape => control_flow.set_exit(), + VirtualKeyCode::Up => state_mut.translate_view(1, 1), + VirtualKeyCode::Down => state_mut.translate_view(-1, 1), + VirtualKeyCode::Left => state_mut.translate_view(-1, 0), + VirtualKeyCode::Right => state_mut.translate_view(1, 0), + VirtualKeyCode::U => state_mut.max_iterations += 3, + VirtualKeyCode::D => state_mut.max_iterations -= 3, + _ => {} + } + wgpu_context_ref.window.request_redraw(); + } + } + WindowEvent::MouseWheel { delta, .. } => { + let change = match delta { + winit::event::MouseScrollDelta::LineDelta(_, vertical) => vertical, + winit::event::MouseScrollDelta::PixelDelta(pos) => pos.y as f32 / 20.0, + }; + let state_mut = state.as_mut().unwrap(); + let wgpu_context_ref = wgpu_context.as_ref().unwrap(); + // (7b) + state_mut.zoom(change); + wgpu_context_ref.window.request_redraw(); + } + WindowEvent::Resized(new_size) => { + let wgpu_context_mut = wgpu_context.as_mut().unwrap(); + wgpu_context_mut.resize(new_size); + wgpu_context_mut.window.request_redraw(); + } + _ => {} + }, + Event::RedrawRequested(id) if id == main_window_id => { + let wgpu_context_ref = wgpu_context.as_ref().unwrap(); + let state_ref = state.as_ref().unwrap(); + let frame = wgpu_context_ref.surface.get_current_texture().unwrap(); + let view = frame + .texture + .create_view(&wgpu::TextureViewDescriptor::default()); + + // (8) + wgpu_context_ref.queue.write_buffer( + &wgpu_context_ref.uniform_buffer, + 0, + &state_ref.as_wgsl_bytes().expect( + "Error in encase translating AppState \ + struct to WGSL bytes.", + ), + ); + let mut encoder = wgpu_context_ref + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::GREEN), + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + occlusion_query_set: None, + timestamp_writes: None, + }); + render_pass.set_pipeline(&wgpu_context_ref.pipeline); + // (9) + render_pass.set_bind_group(0, &wgpu_context_ref.bind_group, &[]); + render_pass.draw(0..3, 0..1); + } + wgpu_context_ref.queue.submit(Some(encoder.finish())); + frame.present(); + } + _ => {} + } + }); +} + +fn main() { + let event_loop = EventLoop::new(); + let window = winit::window::WindowBuilder::new() + .with_title("Remember: Use U/D to change sample count!") + .with_inner_size(winit::dpi::LogicalSize::new(900, 900)) + .build(&event_loop) + .unwrap(); + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::builder().format_timestamp_nanos().init(); + pollster::block_on(run(event_loop, window)); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init().expect("could not initialize logger"); + use winit::platform::web::WindowExtWebSys; + + let document = web_sys::window() + .and_then(|win| win.document()) + .expect("Failed to get document."); + let body = document.body().unwrap(); + body.append_child(&web_sys::Element::from(window.canvas())) + .unwrap(); + let controls_text = document + .create_element("p") + .expect("Failed to create controls text as element."); + controls_text.set_inner_html( + "Controls:
+Up, Down, Left, Right: Move view,
+Scroll: Zoom,
+U, D: Increase / decrease sample count.", + ); + body.append_child(&controls_text) + .expect("Failed to append controls text to body."); + + wasm_bindgen_futures::spawn_local(run(event_loop, window)); + } +} diff --git a/examples/uniform-values/src/shader.wgsl b/examples/uniform-values/src/shader.wgsl new file mode 100644 index 0000000000..1fdbbc44d6 --- /dev/null +++ b/examples/uniform-values/src/shader.wgsl @@ -0,0 +1,61 @@ +// Some credit to https://github.com/paulgb/wgsl-playground/tree/main. + +// We use seperate the x and y instead of using a vec2 to avoid wgsl padding. +struct AppState { + pos_x: f32, + pos_y: f32, + zoom: f32, + max_iterations: u32, +} + +struct VertexInput { + @builtin(vertex_index) vertex_index: u32, +}; + +struct VertexOutput { + @builtin(position) position: vec4, + @location(0) coord: vec2, +}; + +@group(0) +@binding(0) +var app_state: AppState; + +@vertex +fn vs_main(in: VertexInput) -> VertexOutput { + var vertices = array, 3>( + vec2(-1., 1.), + vec2(3.0, 1.), + vec2(-1., -3.0), + ); + var out: VertexOutput; + out.coord = vertices[in.vertex_index]; + out.position = vec4(out.coord, 0.0, 1.0); + + return out; +} + +@fragment +fn fs_main(in: VertexOutput) -> @location(0) vec4 { + let max_iterations = app_state.max_iterations; + var final_iteration = max_iterations; + let c = vec2( + // Translated to put everything nicely in frame. + (in.coord.x) * 3.0 / app_state.zoom + app_state.pos_x, + (in.coord.y) * 3.0 / app_state.zoom + app_state.pos_y + ); + var current_z = c; + var next_z: vec2; + for (var i = 0u; i < max_iterations; i++) { + next_z.x = (current_z.x * current_z.x - current_z.y * current_z.y) + c.x; + next_z.y = (2.0 * current_z.x * current_z.y) + c.y; + current_z = next_z; + if length(current_z) > 4.0 { + final_iteration = i; + break; + } + } + let value = f32(final_iteration) / f32(max_iterations); + + return vec4(value, value, value, 1.0); +} \ No newline at end of file