From 03ff99e2c32b863c2eedaca638c7ddac94c592f0 Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Thu, 2 Jan 2025 19:30:47 -0500
Subject: [PATCH] Implement Clone on Api Types and Arc Dispatcher (#6665)

---
 tests/tests/cloneable_types.rs        | 43 ++++++++++++
 tests/tests/root.rs                   |  1 +
 wgpu/src/api/adapter.rs               | 23 ++++---
 wgpu/src/api/bind_group.rs            |  6 +-
 wgpu/src/api/bind_group_layout.rs     |  6 +-
 wgpu/src/api/blas.rs                  | 20 ++----
 wgpu/src/api/buffer.rs                | 67 +++++++++++++------
 wgpu/src/api/command_buffer.rs        | 10 ++-
 wgpu/src/api/command_encoder.rs       | 16 +++--
 wgpu/src/api/compute_pass.rs          |  4 +-
 wgpu/src/api/compute_pipeline.rs      | 10 ++-
 wgpu/src/api/device.rs                | 94 +++++++++++++++++----------
 wgpu/src/api/instance.rs              | 30 ++++++---
 wgpu/src/api/pipeline_cache.rs        |  6 +-
 wgpu/src/api/pipeline_layout.rs       |  6 +-
 wgpu/src/api/query_set.rs             |  6 +-
 wgpu/src/api/queue.rs                 | 24 ++++---
 wgpu/src/api/render_bundle.rs         |  6 +-
 wgpu/src/api/render_bundle_encoder.rs | 15 +++--
 wgpu/src/api/render_pass.rs           | 29 +++++----
 wgpu/src/api/render_pipeline.rs       | 12 ++--
 wgpu/src/api/sampler.rs               |  6 +-
 wgpu/src/api/surface.rs               | 10 +--
 wgpu/src/api/surface_texture.rs       |  8 +--
 wgpu/src/api/texture.rs               | 43 +++++++-----
 wgpu/src/api/texture_view.rs          |  6 +-
 wgpu/src/api/tlas.rs                  | 16 +++--
 wgpu/src/backend/webgpu.rs            |  8 +--
 wgpu/src/backend/wgpu_core.rs         | 45 +++++++------
 wgpu/src/util/mod.rs                  |  4 +-
 30 files changed, 374 insertions(+), 206 deletions(-)
 create mode 100644 tests/tests/cloneable_types.rs
diff --git a/tests/tests/cloneable_types.rs b/tests/tests/cloneable_types.rs
new file mode 100644
index 0000000000..91ee686cff
--- /dev/null
+++ b/tests/tests/cloneable_types.rs
@@ -0,0 +1,43 @@
+use wgpu_test::{gpu_test, TestingContext};
+
+#[gpu_test]
+static CLONEABLE_BUFFERS: GpuTestConfiguration =
+    wgpu_test::GpuTestConfiguration::new().run_sync(cloneable_buffers);
+
+// Test a basic case of cloneable types where you clone the buffer to be able
+// to access the buffer inside the callback as well as outside.
+fn cloneable_buffers(ctx: TestingContext) {
+    let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: None,
+        size: 32,
+        usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+        mapped_at_creation: true,
+    });
+
+    let buffer_contents: Vec<u8> = (0..32).collect();
+
+    buffer
+        .slice(..)
+        .get_mapped_range_mut()
+        .copy_from_slice(&buffer_contents);
+
+    buffer.unmap();
+
+    // This is actually a bug, we should not need to call submit to make the buffer contents visible.
+    ctx.queue.submit([]);
+
+    let cloned_buffer = buffer.clone();
+    let cloned_buffer_contents = buffer_contents.clone();
+
+    buffer.slice(..).map_async(wgpu::MapMode::Read, move |_| {
+        let data = cloned_buffer.slice(..).get_mapped_range();
+
+        assert_eq!(&*data, &cloned_buffer_contents);
+    });
+
+    ctx.device.poll(wgpu::Maintain::Wait);
+
+    let data = buffer.slice(..).get_mapped_range();
+
+    assert_eq!(&*data, &buffer_contents);
+}
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 9df2b12248..f6e1272324 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -18,6 +18,7 @@ mod buffer;
 mod buffer_copy;
 mod buffer_usages;
 mod clear_texture;
+mod cloneable_types;
 mod compute_pass_ownership;
 mod create_surface_error;
 mod device;
diff --git a/wgpu/src/api/adapter.rs b/wgpu/src/api/adapter.rs
index 8fb5225bfe..bf4dac26a1 100644
--- a/wgpu/src/api/adapter.rs
+++ b/wgpu/src/api/adapter.rs
@@ -1,4 +1,4 @@
-use std::future::Future;
+use std::{future::Future, sync::Arc};
 
 use crate::*;
 
@@ -13,9 +13,9 @@ use crate::*;
 /// Does not have to be kept alive.
 ///
 /// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Adapter {
-    pub(crate) inner: dispatch::DispatchAdapter,
+    pub(crate) inner: Arc<dispatch::DispatchAdapter>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Adapter: Send, Sync);
@@ -65,9 +65,16 @@ impl Adapter {
     ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
         let device = self.inner.request_device(desc, trace_path);
         async move {
-            device
-                .await
-                .map(|(device, queue)| (Device { inner: device }, Queue { inner: queue }))
+            device.await.map(|(device, queue)| {
+                (
+                    Device {
+                        inner: Arc::new(device),
+                    },
+                    Queue {
+                        inner: Arc::new(queue),
+                    },
+                )
+            })
         }
     }
 
@@ -93,10 +100,10 @@ impl Adapter {
 
         Ok((
             Device {
-                inner: device.into(),
+                inner: Arc::new(device.into()),
             },
             Queue {
-                inner: queue.into(),
+                inner: Arc::new(queue.into()),
             },
         ))
     }
diff --git a/wgpu/src/api/bind_group.rs b/wgpu/src/api/bind_group.rs
index 1cb7337855..d31db81c2a 100644
--- a/wgpu/src/api/bind_group.rs
+++ b/wgpu/src/api/bind_group.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a binding group.
@@ -8,9 +10,9 @@ use crate::*;
 /// [`ComputePass`] with [`ComputePass::set_bind_group`].
 ///
 /// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct BindGroup {
-    pub(crate) inner: dispatch::DispatchBindGroup,
+    pub(crate) inner: Arc<dispatch::DispatchBindGroup>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(BindGroup: Send, Sync);
diff --git a/wgpu/src/api/bind_group_layout.rs b/wgpu/src/api/bind_group_layout.rs
index 191752a239..beba5b7d2d 100644
--- a/wgpu/src/api/bind_group_layout.rs
+++ b/wgpu/src/api/bind_group_layout.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a binding group layout.
@@ -11,9 +13,9 @@ use crate::*;
 ///
 /// Corresponds to [WebGPU `GPUBindGroupLayout`](
 /// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct BindGroupLayout {
-    pub(crate) inner: dispatch::DispatchBindGroupLayout,
+    pub(crate) inner: Arc<dispatch::DispatchBindGroupLayout>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
diff --git a/wgpu/src/api/blas.rs b/wgpu/src/api/blas.rs
index b64c01ba8f..3f1c86b9de 100644
--- a/wgpu/src/api/blas.rs
+++ b/wgpu/src/api/blas.rs
@@ -44,7 +44,7 @@ static_assertions::assert_impl_all!(CreateBlasDescriptor<'_>: Send, Sync);
 /// [TlasPackage]: crate::TlasPackage
 #[derive(Debug, Clone)]
 pub struct TlasInstance {
-    pub(crate) blas: Arc<BlasShared>,
+    pub(crate) blas: Arc<dispatch::DispatchBlas>,
     /// Affine transform matrix 3x4 (rows x columns, row major order).
     pub transform: [f32; 12],
     /// Custom index for the instance used inside the shader.
@@ -71,7 +71,7 @@ impl TlasInstance {
     /// generate a validation error.
     pub fn new(blas: &Blas, transform: [f32; 12], custom_index: u32, mask: u8) -> Self {
         Self {
-            blas: blas.shared.clone(),
+            blas: blas.inner.clone(),
             transform,
             custom_index,
             mask,
@@ -83,7 +83,7 @@ impl TlasInstance {
     /// See the note on [TlasInstance] about the
     /// guarantees of keeping a BLAS alive.
     pub fn set_blas(&mut self, blas: &Blas) {
-        self.blas = blas.shared.clone();
+        self.blas = blas.inner.clone();
     }
 }
 
@@ -128,13 +128,7 @@ pub struct BlasBuildEntry<'a> {
 }
 static_assertions::assert_impl_all!(BlasBuildEntry<'_>: WasmNotSendSync);
 
-#[derive(Debug)]
-pub(crate) struct BlasShared {
-    pub(crate) inner: dispatch::DispatchBlas,
-}
-static_assertions::assert_impl_all!(BlasShared: WasmNotSendSync);
-
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 /// Bottom Level Acceleration Structure (BLAS).
 ///
 /// A BLAS is a device-specific raytracing acceleration structure that contains geometry data.
@@ -144,11 +138,11 @@ static_assertions::assert_impl_all!(BlasShared: WasmNotSendSync);
 /// [Tlas]: crate::Tlas
 pub struct Blas {
     pub(crate) handle: Option<u64>,
-    pub(crate) shared: Arc<BlasShared>,
+    pub(crate) inner: Arc<dispatch::DispatchBlas>,
 }
 static_assertions::assert_impl_all!(Blas: WasmNotSendSync);
 
-crate::cmp::impl_eq_ord_hash_proxy!(Blas => .shared.inner);
+crate::cmp::impl_eq_ord_hash_proxy!(Blas => .inner);
 
 impl Blas {
     /// Raw handle to the acceleration structure, used inside raw instance buffers.
@@ -157,7 +151,7 @@ impl Blas {
     }
     /// Destroy the associated native resources as soon as possible.
     pub fn destroy(&self) {
-        self.shared.inner.destroy();
+        self.inner.destroy();
     }
 }
 
diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index eacfd9ecc5..ca393c6ad0 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -1,12 +1,22 @@
 use std::{
     error, fmt,
     ops::{Bound, Deref, DerefMut, Range, RangeBounds},
+    sync::Arc,
 };
 
 use parking_lot::Mutex;
 
 use crate::*;
 
+#[derive(Debug)]
+pub(crate) struct BufferShared {
+    pub inner: dispatch::DispatchBuffer,
+    pub map_context: Mutex<MapContext>,
+    pub size: wgt::BufferAddress,
+    pub usage: BufferUsages,
+    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
+}
+
 /// Handle to a GPU-accessible buffer.
 ///
 /// Created with [`Device::create_buffer`] or
@@ -167,18 +177,14 @@ use crate::*;
 /// [mac]: BufferDescriptor::mapped_at_creation
 /// [`MAP_READ`]: BufferUsages::MAP_READ
 /// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Buffer {
-    pub(crate) inner: dispatch::DispatchBuffer,
-    pub(crate) map_context: Mutex<MapContext>,
-    pub(crate) size: wgt::BufferAddress,
-    pub(crate) usage: BufferUsages,
-    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
+    pub(crate) shared: Arc<BufferShared>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Buffer: Send, Sync);
 
-crate::cmp::impl_eq_ord_hash_proxy!(Buffer => .inner);
+crate::cmp::impl_eq_ord_hash_proxy!(Buffer => .shared.inner);
 
 impl Buffer {
     /// Return the binding view of the entire buffer.
@@ -206,7 +212,7 @@ impl Buffer {
         &self,
         hal_buffer_callback: F,
     ) -> R {
-        if let Some(buffer) = self.inner.as_core_opt() {
+        if let Some(buffer) = self.shared.inner.as_core_opt() {
             unsafe {
                 buffer
                     .context
@@ -233,7 +239,7 @@ impl Buffer {
     /// end of the buffer.
     pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
         let (offset, size) = range_to_offset_size(bounds);
-        check_buffer_bounds(self.size, offset, size);
+        check_buffer_bounds(self.shared.size, offset, size);
         BufferSlice {
             buffer: self,
             offset,
@@ -243,27 +249,27 @@ impl Buffer {
 
     /// Flushes any pending write operations and unmaps the buffer from host memory.
     pub fn unmap(&self) {
-        self.map_context.lock().reset();
-        self.inner.unmap();
+        self.shared.map_context.lock().reset();
+        self.shared.inner.unmap();
     }
 
     /// Destroy the associated native resources as soon as possible.
     pub fn destroy(&self) {
-        self.inner.destroy();
+        self.shared.inner.destroy();
     }
 
     /// Returns the length of the buffer allocation in bytes.
     ///
     /// This is always equal to the `size` that was specified when creating the buffer.
     pub fn size(&self) -> BufferAddress {
-        self.size
+        self.shared.size
     }
 
     /// Returns the allowed usages for this `Buffer`.
     ///
     /// This is always equal to the `usage` that was specified when creating the buffer.
     pub fn usage(&self) -> BufferUsages {
-        self.usage
+        self.shared.usage
     }
 }
 
@@ -330,7 +336,7 @@ impl<'a> BufferSlice<'a> {
         mode: MapMode,
         callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
     ) {
-        let mut mc = self.buffer.map_context.lock();
+        let mut mc = self.buffer.shared.map_context.lock();
         assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped");
         let end = match self.size {
             Some(s) => self.offset + s.get(),
@@ -339,6 +345,7 @@ impl<'a> BufferSlice<'a> {
         mc.initial_range = self.offset..end;
 
         self.buffer
+            .shared
             .inner
             .map_async(mode, self.offset..end, Box::new(callback));
     }
@@ -358,8 +365,13 @@ impl<'a> BufferSlice<'a> {
     ///
     /// [mapped]: Buffer#mapping-buffers
     pub fn get_mapped_range(&self) -> BufferView<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let range = self.buffer.inner.get_mapped_range(self.offset..end);
+        let end = self
+            .buffer
+            .shared
+            .map_context
+            .lock()
+            .add(self.offset, self.size);
+        let range = self.buffer.shared.inner.get_mapped_range(self.offset..end);
         BufferView {
             slice: *self,
             inner: range,
@@ -376,9 +388,15 @@ impl<'a> BufferSlice<'a> {
     /// This is only available on WebGPU, on any other backends this will return `None`.
     #[cfg(webgpu)]
     pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let end = self
+            .buffer
+            .shared
+            .map_context
+            .lock()
+            .add(self.offset, self.size);
 
         self.buffer
+            .shared
             .inner
             .get_mapped_range_as_array_buffer(self.offset..end)
     }
@@ -398,12 +416,17 @@ impl<'a> BufferSlice<'a> {
     ///
     /// [mapped]: Buffer#mapping-buffers
     pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let range = self.buffer.inner.get_mapped_range(self.offset..end);
+        let end = self
+            .buffer
+            .shared
+            .map_context
+            .lock()
+            .add(self.offset, self.size);
+        let range = self.buffer.shared.inner.get_mapped_range(self.offset..end);
         BufferViewMut {
             slice: *self,
             inner: range,
-            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
+            readable: self.buffer.shared.usage.contains(BufferUsages::MAP_READ),
         }
     }
 }
@@ -628,6 +651,7 @@ impl Drop for BufferView<'_> {
     fn drop(&mut self) {
         self.slice
             .buffer
+            .shared
             .map_context
             .lock()
             .remove(self.slice.offset, self.slice.size);
@@ -638,6 +662,7 @@ impl Drop for BufferViewMut<'_> {
     fn drop(&mut self) {
         self.slice
             .buffer
+            .shared
             .map_context
             .lock()
             .remove(self.slice.offset, self.slice.size);
diff --git a/wgpu/src/api/command_buffer.rs b/wgpu/src/api/command_buffer.rs
index e76ae2d5e9..b582bf1f05 100644
--- a/wgpu/src/api/command_buffer.rs
+++ b/wgpu/src/api/command_buffer.rs
@@ -1,3 +1,7 @@
+use std::sync::Arc;
+
+use parking_lot::Mutex;
+
 use crate::*;
 
 /// Handle to a command buffer on the GPU.
@@ -7,11 +11,11 @@ use crate::*;
 /// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
 ///
 /// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct CommandBuffer {
-    pub(crate) inner: Option<dispatch::DispatchCommandBuffer>,
+    pub(crate) inner: Arc<Mutex<Option<dispatch::DispatchCommandBuffer>>>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
 
-crate::cmp::impl_eq_ord_hash_proxy!(CommandBuffer => .inner);
+crate::cmp::impl_eq_ord_hash_arc_address!(CommandBuffer => .inner);
diff --git a/wgpu/src/api/command_encoder.rs b/wgpu/src/api/command_encoder.rs
index cd493587a7..574ec27277 100644
--- a/wgpu/src/api/command_encoder.rs
+++ b/wgpu/src/api/command_encoder.rs
@@ -1,4 +1,4 @@
-use std::ops::Range;
+use std::{ops::Range, sync::Arc};
 
 use crate::{
     api::{
@@ -35,6 +35,7 @@ crate::cmp::impl_eq_ord_hash_proxy!(CommandEncoder => .inner);
 pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
 static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
 
+use parking_lot::Mutex;
 pub use wgt::TexelCopyBufferInfo as TexelCopyBufferInfoBase;
 /// View of a buffer which can be used to copy to/from a texture.
 ///
@@ -59,7 +60,7 @@ impl CommandEncoder {
         let buffer = self.inner.finish();
 
         CommandBuffer {
-            inner: Some(buffer),
+            inner: Arc::new(Mutex::new(Some(buffer))),
         }
     }
 
@@ -121,9 +122,9 @@ impl CommandEncoder {
         copy_size: BufferAddress,
     ) {
         self.inner.copy_buffer_to_buffer(
-            &source.inner,
+            &source.shared.inner,
             source_offset,
-            &destination.inner,
+            &destination.shared.inner,
             destination_offset,
             copy_size,
         );
@@ -182,7 +183,8 @@ impl CommandEncoder {
     /// - `CLEAR_TEXTURE` extension not enabled
     /// - Range is out of bounds
     pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
-        self.inner.clear_texture(&texture.inner, subresource_range);
+        self.inner
+            .clear_texture(&texture.shared.inner, subresource_range);
     }
 
     /// Clears buffer to zero.
@@ -197,7 +199,7 @@ impl CommandEncoder {
         offset: BufferAddress,
         size: Option<BufferAddress>,
     ) {
-        self.inner.clear_buffer(&buffer.inner, offset, size);
+        self.inner.clear_buffer(&buffer.shared.inner, offset, size);
     }
 
     /// Inserts debug marker.
@@ -230,7 +232,7 @@ impl CommandEncoder {
             &query_set.inner,
             query_range.start,
             query_range.end - query_range.start,
-            &destination.inner,
+            &destination.shared.inner,
             destination_offset,
         );
     }
diff --git a/wgpu/src/api/compute_pass.rs b/wgpu/src/api/compute_pass.rs
index c7fa7462f1..81e179aff9 100644
--- a/wgpu/src/api/compute_pass.rs
+++ b/wgpu/src/api/compute_pass.rs
@@ -52,7 +52,7 @@ impl ComputePass<'_> {
         Option<&'a BindGroup>: From<BG>,
     {
         let bg: Option<&BindGroup> = bind_group.into();
-        let bg = bg.map(|bg| &bg.inner);
+        let bg = bg.map(|bg| &*bg.inner);
         self.inner.set_bind_group(index, bg, offsets);
     }
 
@@ -92,7 +92,7 @@ impl ComputePass<'_> {
         indirect_offset: BufferAddress,
     ) {
         self.inner
-            .dispatch_workgroups_indirect(&indirect_buffer.inner, indirect_offset);
+            .dispatch_workgroups_indirect(&indirect_buffer.shared.inner, indirect_offset);
     }
 }
 
diff --git a/wgpu/src/api/compute_pipeline.rs b/wgpu/src/api/compute_pipeline.rs
index b1919301cc..9ccf85c599 100644
--- a/wgpu/src/api/compute_pipeline.rs
+++ b/wgpu/src/api/compute_pipeline.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a compute pipeline.
@@ -6,9 +8,9 @@ use crate::*;
 /// It can be created with [`Device::create_compute_pipeline`].
 ///
 /// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ComputePipeline {
-    pub(crate) inner: dispatch::DispatchComputePipeline,
+    pub(crate) inner: Arc<dispatch::DispatchComputePipeline>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
@@ -25,7 +27,9 @@ impl ComputePipeline {
     /// This method will raise a validation error if there is no bind group layout at `index`.
     pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
         let bind_group = self.inner.get_bind_group_layout(index);
-        BindGroupLayout { inner: bind_group }
+        BindGroupLayout {
+            inner: Arc::new(bind_group),
+        }
     }
 }
 
diff --git a/wgpu/src/api/device.rs b/wgpu/src/api/device.rs
index 40a0b0204d..ddc8641619 100644
--- a/wgpu/src/api/device.rs
+++ b/wgpu/src/api/device.rs
@@ -2,7 +2,7 @@ use std::{error, fmt, future::Future, sync::Arc};
 
 use parking_lot::Mutex;
 
-use crate::api::blas::{Blas, BlasGeometrySizeDescriptors, BlasShared, CreateBlasDescriptor};
+use crate::api::blas::{Blas, BlasGeometrySizeDescriptors, CreateBlasDescriptor};
 use crate::api::tlas::{CreateTlasDescriptor, Tlas};
 use crate::*;
 
@@ -14,9 +14,9 @@ use crate::*;
 /// A device may be requested from an adapter with [`Adapter::request_device`].
 ///
 /// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Device {
-    pub(crate) inner: dispatch::DispatchDevice,
+    pub(crate) inner: Arc<dispatch::DispatchDevice>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Device: Send, Sync);
@@ -170,7 +170,9 @@ impl Device {
     #[must_use]
     pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
         let group = self.inner.create_bind_group(desc);
-        BindGroup { inner: group }
+        BindGroup {
+            inner: Arc::new(group),
+        }
     }
 
     /// Creates a [`BindGroupLayout`].
@@ -180,28 +182,36 @@ impl Device {
         desc: &BindGroupLayoutDescriptor<'_>,
     ) -> BindGroupLayout {
         let layout = self.inner.create_bind_group_layout(desc);
-        BindGroupLayout { inner: layout }
+        BindGroupLayout {
+            inner: Arc::new(layout),
+        }
     }
 
     /// Creates a [`PipelineLayout`].
     #[must_use]
     pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
         let layout = self.inner.create_pipeline_layout(desc);
-        PipelineLayout { inner: layout }
+        PipelineLayout {
+            inner: Arc::new(layout),
+        }
     }
 
     /// Creates a [`RenderPipeline`].
     #[must_use]
     pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
         let pipeline = self.inner.create_render_pipeline(desc);
-        RenderPipeline { inner: pipeline }
+        RenderPipeline {
+            inner: Arc::new(pipeline),
+        }
     }
 
     /// Creates a [`ComputePipeline`].
     #[must_use]
     pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
         let pipeline = self.inner.create_compute_pipeline(desc);
-        ComputePipeline { inner: pipeline }
+        ComputePipeline {
+            inner: Arc::new(pipeline),
+        }
     }
 
     /// Creates a [`Buffer`].
@@ -215,10 +225,12 @@ impl Device {
         let buffer = self.inner.create_buffer(desc);
 
         Buffer {
-            inner: buffer,
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
+            shared: Arc::new(BufferShared {
+                inner: buffer,
+                map_context: Mutex::new(map_context),
+                size: desc.size,
+                usage: desc.usage,
+            }),
         }
     }
 
@@ -230,12 +242,14 @@ impl Device {
         let texture = self.inner.create_texture(desc);
 
         Texture {
-            inner: texture,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
+            shared: Arc::new(TextureShared {
+                inner: texture,
+                descriptor: TextureDescriptor {
+                    label: None,
+                    view_formats: &[],
+                    ..desc.clone()
+                },
+            }),
         }
     }
 
@@ -260,12 +274,14 @@ impl Device {
                 .create_texture_from_hal::<A>(hal_texture, core_device, desc)
         };
         Texture {
-            inner: texture.into(),
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
+            shared: Arc::new(TextureShared {
+                inner: texture.into(),
+                descriptor: TextureDescriptor {
+                    label: None,
+                    view_formats: &[],
+                    ..desc.clone()
+                },
+            }),
         }
     }
 
@@ -296,10 +312,12 @@ impl Device {
         };
 
         Buffer {
-            inner: buffer.into(),
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
+            shared: Arc::new(BufferShared {
+                inner: buffer.into(),
+                map_context: Mutex::new(map_context),
+                size: desc.size,
+                usage: desc.usage,
+            }),
         }
     }
 
@@ -309,14 +327,18 @@ impl Device {
     #[must_use]
     pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
         let sampler = self.inner.create_sampler(desc);
-        Sampler { inner: sampler }
+        Sampler {
+            inner: Arc::new(sampler),
+        }
     }
 
     /// Creates a new [`QuerySet`].
     #[must_use]
     pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
         let query_set = self.inner.create_query_set(desc);
-        QuerySet { inner: query_set }
+        QuerySet {
+            inner: Arc::new(query_set),
+        }
     }
 
     /// Set a callback for errors that are not handled in error scopes.
@@ -456,7 +478,9 @@ impl Device {
         desc: &PipelineCacheDescriptor<'_>,
     ) -> PipelineCache {
         let cache = unsafe { self.inner.create_pipeline_cache(desc) };
-        PipelineCache { inner: cache }
+        PipelineCache {
+            inner: Arc::new(cache),
+        }
     }
 }
 
@@ -487,7 +511,7 @@ impl Device {
         let (handle, blas) = self.inner.create_blas(desc, sizes);
 
         Blas {
-            shared: Arc::new(BlasShared { inner: blas }),
+            inner: Arc::new(blas),
             handle,
         }
     }
@@ -506,8 +530,10 @@ impl Device {
         let tlas = self.inner.create_tlas(desc);
 
         Tlas {
-            inner: tlas,
-            max_instances: desc.max_instances,
+            shared: Arc::new(TlasShared {
+                inner: tlas,
+                max_instances: desc.max_instances,
+            }),
         }
     }
 }
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
index f03e348183..81bbe8af5a 100644
--- a/wgpu/src/api/instance.rs
+++ b/wgpu/src/api/instance.rs
@@ -2,7 +2,7 @@ use parking_lot::Mutex;
 
 use crate::{dispatch::InstanceInterface, *};
 
-use std::future::Future;
+use std::{future::Future, sync::Arc};
 
 /// Context for all other wgpu objects. Instance of wgpu.
 ///
@@ -12,9 +12,9 @@ use std::future::Future;
 /// Does not have to be kept alive.
 ///
 /// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Instance {
-    inner: dispatch::DispatchInstance,
+    inner: Arc<dispatch::DispatchInstance>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Instance: Send, Sync);
@@ -131,7 +131,7 @@ impl Instance {
 
             if is_only_available_backend || (requested_webgpu && support_webgpu) {
                 return Self {
-                    inner: crate::backend::ContextWebGpu::new(_instance_desc).into(),
+                    inner: Arc::new(crate::backend::ContextWebGpu::new(_instance_desc).into()),
                 };
             }
         }
@@ -139,7 +139,7 @@ impl Instance {
         #[cfg(wgpu_core)]
         {
             return Self {
-                inner: crate::backend::ContextWgpuCore::new(_instance_desc).into(),
+                inner: Arc::new(crate::backend::ContextWgpuCore::new(_instance_desc).into()),
             };
         }
 
@@ -161,7 +161,9 @@ impl Instance {
     pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
         Self {
             inner: unsafe {
-                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance).into()
+                Arc::new(
+                    crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance).into(),
+                )
             },
         }
     }
@@ -196,7 +198,7 @@ impl Instance {
     pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
         Self {
             inner: unsafe {
-                crate::backend::ContextWgpuCore::from_core_instance(core_instance).into()
+                Arc::new(crate::backend::ContextWgpuCore::from_core_instance(core_instance).into())
             },
         }
     }
@@ -220,7 +222,9 @@ impl Instance {
                     context: core_instance.clone(),
                     id: adapter,
                 };
-                crate::Adapter { inner: core.into() }
+                crate::Adapter {
+                    inner: Arc::new(core.into()),
+                }
             })
             .collect()
     }
@@ -237,7 +241,11 @@ impl Instance {
         options: &RequestAdapterOptions<'_, '_>,
     ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
         let future = self.inner.request_adapter(options);
-        async move { future.await.map(|inner| Adapter { inner }) }
+        async move {
+            future.await.map(|adapter| Adapter {
+                inner: Arc::new(adapter),
+            })
+        }
     }
 
     /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
@@ -257,7 +265,9 @@ impl Instance {
             id: adapter,
         };
 
-        Adapter { inner: core.into() }
+        Adapter {
+            inner: Arc::new(core.into()),
+        }
     }
 
     /// Creates a new surface targeting a given window/canvas/surface/etc..
diff --git a/wgpu/src/api/pipeline_cache.rs b/wgpu/src/api/pipeline_cache.rs
index 4462a405eb..3ed6aea96c 100644
--- a/wgpu/src/api/pipeline_cache.rs
+++ b/wgpu/src/api/pipeline_cache.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a pipeline cache, which is used to accelerate
@@ -62,9 +64,9 @@ use crate::*;
 /// This type is unique to the Rust API of `wgpu`.
 ///
 /// [renaming]: std::fs::rename
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct PipelineCache {
-    pub(crate) inner: dispatch::DispatchPipelineCache,
+    pub(crate) inner: Arc<dispatch::DispatchPipelineCache>,
 }
 
 #[cfg(send_sync)]
diff --git a/wgpu/src/api/pipeline_layout.rs b/wgpu/src/api/pipeline_layout.rs
index 604dd78efd..5a37a791fc 100644
--- a/wgpu/src/api/pipeline_layout.rs
+++ b/wgpu/src/api/pipeline_layout.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a pipeline layout.
@@ -6,9 +8,9 @@ use crate::*;
 /// It can be created with [`Device::create_pipeline_layout`].
 ///
 /// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct PipelineLayout {
-    pub(crate) inner: dispatch::DispatchPipelineLayout,
+    pub(crate) inner: Arc<dispatch::DispatchPipelineLayout>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
diff --git a/wgpu/src/api/query_set.rs b/wgpu/src/api/query_set.rs
index a0d358ed4d..41eb1c5c5f 100644
--- a/wgpu/src/api/query_set.rs
+++ b/wgpu/src/api/query_set.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a query set.
@@ -5,9 +7,9 @@ use crate::*;
 /// It can be created with [`Device::create_query_set`].
 ///
 /// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct QuerySet {
-    pub(crate) inner: dispatch::DispatchQuerySet,
+    pub(crate) inner: Arc<dispatch::DispatchQuerySet>,
 }
 #[cfg(send_sync)]
 #[cfg(send_sync)]
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index 89f505d572..ad96ed0234 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -1,4 +1,7 @@
-use std::ops::{Deref, DerefMut};
+use std::{
+    ops::{Deref, DerefMut},
+    sync::Arc,
+};
 
 use crate::*;
 
@@ -9,9 +12,9 @@ use crate::*;
 /// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
 ///
 /// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Queue {
-    pub(crate) inner: dispatch::DispatchQueue,
+    pub(crate) inner: Arc<dispatch::DispatchQueue>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Queue: Send, Sync);
@@ -77,7 +80,7 @@ impl Drop for QueueWriteBufferView<'_> {
     fn drop(&mut self) {
         self.queue
             .inner
-            .write_staging_buffer(&self.buffer.inner, self.offset, &self.inner);
+            .write_staging_buffer(&self.buffer.shared.inner, self.offset, &self.inner);
     }
 }
 
@@ -103,7 +106,7 @@ impl Queue {
     /// method avoids an intermediate copy and is often able to transfer data
     /// more efficiently than this one.
     pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
-        self.inner.write_buffer(&buffer.inner, offset, data);
+        self.inner.write_buffer(&buffer.shared.inner, offset, data);
     }
 
     /// Write to a buffer via a directly mapped staging buffer.
@@ -143,7 +146,7 @@ impl Queue {
     ) -> Option<QueueWriteBufferView<'a>> {
         profiling::scope!("Queue::write_buffer_with");
         self.inner
-            .validate_write_buffer(&buffer.inner, offset, size)?;
+            .validate_write_buffer(&buffer.shared.inner, offset, size)?;
         let staging_buffer = self.inner.create_staging_buffer(size)?;
         Some(QueueWriteBufferView {
             queue: self,
@@ -204,9 +207,12 @@ impl Queue {
         &self,
         command_buffers: I,
     ) -> SubmissionIndex {
-        let mut command_buffers = command_buffers
-            .into_iter()
-            .map(|mut comb| comb.inner.take().unwrap());
+        let mut command_buffers = command_buffers.into_iter().map(|comb| {
+            comb.inner
+                .lock()
+                .take()
+                .expect("Command buffer already submitted")
+        });
 
         let index = self.inner.submit(&mut command_buffers);
 
diff --git a/wgpu/src/api/render_bundle.rs b/wgpu/src/api/render_bundle.rs
index 1d603eab6b..6cf7f2ea9f 100644
--- a/wgpu/src/api/render_bundle.rs
+++ b/wgpu/src/api/render_bundle.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Pre-prepared reusable bundle of GPU operations.
@@ -9,9 +11,9 @@ use crate::*;
 /// using [`RenderPass::execute_bundles`].
 ///
 /// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct RenderBundle {
-    pub(crate) inner: dispatch::DispatchRenderBundle,
+    pub(crate) inner: Arc<dispatch::DispatchRenderBundle>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
diff --git a/wgpu/src/api/render_bundle_encoder.rs b/wgpu/src/api/render_bundle_encoder.rs
index af0a225102..308a9c8389 100644
--- a/wgpu/src/api/render_bundle_encoder.rs
+++ b/wgpu/src/api/render_bundle_encoder.rs
@@ -1,3 +1,4 @@
+use std::sync::Arc;
 use std::{marker::PhantomData, num::NonZeroU32, ops::Range};
 
 use crate::dispatch::RenderBundleEncoderInterface;
@@ -59,7 +60,9 @@ impl<'a> RenderBundleEncoder<'a> {
             dispatch::DispatchRenderBundleEncoder::WebGPU(b) => b.finish(desc),
         };
 
-        RenderBundle { inner: bundle }
+        RenderBundle {
+            inner: Arc::new(bundle),
+        }
     }
 
     /// Sets the active bind group for a given bind group index. The bind group layout
@@ -71,7 +74,7 @@ impl<'a> RenderBundleEncoder<'a> {
         Option<&'b BindGroup>: From<BG>,
     {
         let bg: Option<&'b BindGroup> = bind_group.into();
-        let bg = bg.map(|x| &x.inner);
+        let bg = bg.map(|x| &*x.inner);
         self.inner.set_bind_group(index, bg, offsets);
     }
 
@@ -88,7 +91,7 @@ impl<'a> RenderBundleEncoder<'a> {
     /// use `buffer` as the source index buffer.
     pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
         self.inner.set_index_buffer(
-            &buffer_slice.buffer.inner,
+            &buffer_slice.buffer.shared.inner,
             index_format,
             buffer_slice.offset,
             buffer_slice.size,
@@ -108,7 +111,7 @@ impl<'a> RenderBundleEncoder<'a> {
     pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
         self.inner.set_vertex_buffer(
             slot,
-            &buffer_slice.buffer.inner,
+            &buffer_slice.buffer.shared.inner,
             buffer_slice.offset,
             buffer_slice.size,
         );
@@ -168,7 +171,7 @@ impl<'a> RenderBundleEncoder<'a> {
     /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
     pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
         self.inner
-            .draw_indirect(&indirect_buffer.inner, indirect_offset);
+            .draw_indirect(&indirect_buffer.shared.inner, indirect_offset);
     }
 
     /// Draws indexed primitives using the active index buffer and the active vertex buffers,
@@ -184,7 +187,7 @@ impl<'a> RenderBundleEncoder<'a> {
         indirect_offset: BufferAddress,
     ) {
         self.inner
-            .draw_indexed_indirect(&indirect_buffer.inner, indirect_offset);
+            .draw_indexed_indirect(&indirect_buffer.shared.inner, indirect_offset);
     }
 }
 
diff --git a/wgpu/src/api/render_pass.rs b/wgpu/src/api/render_pass.rs
index 6802025635..bdf711fa41 100644
--- a/wgpu/src/api/render_pass.rs
+++ b/wgpu/src/api/render_pass.rs
@@ -70,7 +70,7 @@ impl RenderPass<'_> {
         Option<&'a BindGroup>: From<BG>,
     {
         let bg: Option<&'a BindGroup> = bind_group.into();
-        let bg = bg.map(|bg| &bg.inner);
+        let bg = bg.map(|bg| &*bg.inner);
 
         self.inner.set_bind_group(index, bg, offsets);
     }
@@ -97,7 +97,7 @@ impl RenderPass<'_> {
     /// use `buffer` as the source index buffer.
     pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'_>, index_format: IndexFormat) {
         self.inner.set_index_buffer(
-            &buffer_slice.buffer.inner,
+            &buffer_slice.buffer.shared.inner,
             index_format,
             buffer_slice.offset,
             buffer_slice.size,
@@ -117,7 +117,7 @@ impl RenderPass<'_> {
     pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'_>) {
         self.inner.set_vertex_buffer(
             slot,
-            &buffer_slice.buffer.inner,
+            &buffer_slice.buffer.shared.inner,
             buffer_slice.offset,
             buffer_slice.size,
         );
@@ -237,7 +237,7 @@ impl RenderPass<'_> {
     /// See details on the individual flags for more information.
     pub fn draw_indirect(&mut self, indirect_buffer: &Buffer, indirect_offset: BufferAddress) {
         self.inner
-            .draw_indirect(&indirect_buffer.inner, indirect_offset);
+            .draw_indirect(&indirect_buffer.shared.inner, indirect_offset);
     }
 
     /// Draws indexed primitives using the active index buffer and the active vertex buffers,
@@ -260,7 +260,7 @@ impl RenderPass<'_> {
         indirect_offset: BufferAddress,
     ) {
         self.inner
-            .draw_indexed_indirect(&indirect_buffer.inner, indirect_offset);
+            .draw_indexed_indirect(&indirect_buffer.shared.inner, indirect_offset);
     }
 
     /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
@@ -272,7 +272,7 @@ impl RenderPass<'_> {
         &mut self,
         render_bundles: I,
     ) {
-        let mut render_bundles = render_bundles.into_iter().map(|rb| &rb.inner);
+        let mut render_bundles = render_bundles.into_iter().map(|rb| &*rb.inner);
 
         self.inner.execute_bundles(&mut render_bundles);
     }
@@ -297,7 +297,7 @@ impl RenderPass<'_> {
         count: u32,
     ) {
         self.inner
-            .multi_draw_indirect(&indirect_buffer.inner, indirect_offset, count);
+            .multi_draw_indirect(&indirect_buffer.shared.inner, indirect_offset, count);
     }
 
     /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
@@ -317,8 +317,11 @@ impl RenderPass<'_> {
         indirect_offset: BufferAddress,
         count: u32,
     ) {
-        self.inner
-            .multi_draw_indexed_indirect(&indirect_buffer.inner, indirect_offset, count);
+        self.inner.multi_draw_indexed_indirect(
+            &indirect_buffer.shared.inner,
+            indirect_offset,
+            count,
+        );
     }
 }
 
@@ -355,9 +358,9 @@ impl RenderPass<'_> {
         max_count: u32,
     ) {
         self.inner.multi_draw_indirect_count(
-            &indirect_buffer.inner,
+            &indirect_buffer.shared.inner,
             indirect_offset,
-            &count_buffer.inner,
+            &count_buffer.shared.inner,
             count_offset,
             max_count,
         );
@@ -397,9 +400,9 @@ impl RenderPass<'_> {
         max_count: u32,
     ) {
         self.inner.multi_draw_indexed_indirect_count(
-            &indirect_buffer.inner,
+            &indirect_buffer.shared.inner,
             indirect_offset,
-            &count_buffer.inner,
+            &count_buffer.shared.inner,
             count_offset,
             max_count,
         );
diff --git a/wgpu/src/api/render_pipeline.rs b/wgpu/src/api/render_pipeline.rs
index 71131e941e..a215ab7bab 100644
--- a/wgpu/src/api/render_pipeline.rs
+++ b/wgpu/src/api/render_pipeline.rs
@@ -1,4 +1,4 @@
-use std::num::NonZeroU32;
+use std::{num::NonZeroU32, sync::Arc};
 
 use crate::*;
 
@@ -8,9 +8,9 @@ use crate::*;
 /// buffers and targets. It can be created with [`Device::create_render_pipeline`].
 ///
 /// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct RenderPipeline {
-    pub(crate) inner: dispatch::DispatchRenderPipeline,
+    pub(crate) inner: Arc<dispatch::DispatchRenderPipeline>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
@@ -25,8 +25,10 @@ impl RenderPipeline {
     ///
     /// This method will raise a validation error if there is no bind group layout at `index`.
     pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let inner = self.inner.get_bind_group_layout(index);
-        BindGroupLayout { inner }
+        let layout = self.inner.get_bind_group_layout(index);
+        BindGroupLayout {
+            inner: Arc::new(layout),
+        }
     }
 }
 
diff --git a/wgpu/src/api/sampler.rs b/wgpu/src/api/sampler.rs
index 4c57819c99..7302404cdd 100644
--- a/wgpu/src/api/sampler.rs
+++ b/wgpu/src/api/sampler.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a sampler.
@@ -9,9 +11,9 @@ use crate::*;
 /// It can be created with [`Device::create_sampler`].
 ///
 /// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Sampler {
-    pub(crate) inner: dispatch::DispatchSampler,
+    pub(crate) inner: Arc<dispatch::DispatchSampler>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Sampler: Send, Sync);
diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs
index 54a916c4d0..a165079928 100644
--- a/wgpu/src/api/surface.rs
+++ b/wgpu/src/api/surface.rs
@@ -1,4 +1,4 @@
-use std::{error, fmt};
+use std::{error, fmt, sync::Arc};
 
 use parking_lot::Mutex;
 use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
@@ -128,12 +128,14 @@ impl Surface<'_> {
         texture
             .map(|texture| SurfaceTexture {
                 texture: Texture {
-                    inner: texture,
-                    descriptor,
+                    shared: Arc::new(TextureShared {
+                        inner: texture,
+                        descriptor,
+                    }),
                 },
                 suboptimal,
                 presented: false,
-                detail,
+                detail: Arc::new(detail),
             })
             .ok_or(SurfaceError::Lost)
     }
diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs
index 5059799888..0af392e109 100644
--- a/wgpu/src/api/surface_texture.rs
+++ b/wgpu/src/api/surface_texture.rs
@@ -1,4 +1,4 @@
-use std::{error, fmt, thread};
+use std::{error, fmt, sync::Arc, thread};
 
 use crate::*;
 
@@ -8,7 +8,7 @@ use crate::*;
 /// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
 /// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
 /// a texture without any additional information.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct SurfaceTexture {
     /// Accessible view of the frame.
     pub texture: Texture,
@@ -16,12 +16,12 @@ pub struct SurfaceTexture {
     /// but should be recreated for maximum performance.
     pub suboptimal: bool,
     pub(crate) presented: bool,
-    pub(crate) detail: dispatch::DispatchSurfaceOutputDetail,
+    pub(crate) detail: Arc<dispatch::DispatchSurfaceOutputDetail>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
 
-crate::cmp::impl_eq_ord_hash_proxy!(SurfaceTexture => .texture.inner);
+crate::cmp::impl_eq_ord_hash_proxy!(SurfaceTexture => .texture.shared.inner);
 
 impl SurfaceTexture {
     /// Schedule this texture to be presented on the owning surface.
diff --git a/wgpu/src/api/texture.rs b/wgpu/src/api/texture.rs
index 3fdecd320b..32b9817231 100644
--- a/wgpu/src/api/texture.rs
+++ b/wgpu/src/api/texture.rs
@@ -1,19 +1,26 @@
+use std::sync::Arc;
+
 use crate::*;
 
+#[derive(Debug)]
+pub(crate) struct TextureShared {
+    pub(crate) inner: dispatch::DispatchTexture,
+    pub(crate) descriptor: TextureDescriptor<'static>,
+}
+
 /// Handle to a texture on the GPU.
 ///
 /// It can be created with [`Device::create_texture`].
 ///
 /// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Texture {
-    pub(crate) inner: dispatch::DispatchTexture,
-    pub(crate) descriptor: TextureDescriptor<'static>,
+    pub(crate) shared: Arc<TextureShared>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Texture: Send, Sync);
 
-crate::cmp::impl_eq_ord_hash_proxy!(Texture => .inner);
+crate::cmp::impl_eq_ord_hash_proxy!(Texture => .shared.inner);
 
 impl Texture {
     /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
@@ -27,7 +34,7 @@ impl Texture {
         &self,
         hal_texture_callback: F,
     ) -> R {
-        if let Some(tex) = self.inner.as_core_opt() {
+        if let Some(tex) = self.shared.inner.as_core_opt() {
             unsafe {
                 tex.context
                     .texture_as_hal::<A, F, R>(tex, hal_texture_callback)
@@ -39,14 +46,16 @@ impl Texture {
 
     /// Creates a view of this texture.
     pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
-        let view = self.inner.create_view(desc);
+        let view = self.shared.inner.create_view(desc);
 
-        TextureView { inner: view }
+        TextureView {
+            inner: Arc::new(view),
+        }
     }
 
     /// Destroy the associated native resources as soon as possible.
     pub fn destroy(&self) {
-        self.inner.destroy();
+        self.shared.inner.destroy();
     }
 
     /// Make an `TexelCopyTextureInfo` representing the whole texture.
@@ -63,63 +72,63 @@ impl Texture {
     ///
     /// This is always equal to the `size` that was specified when creating the texture.
     pub fn size(&self) -> Extent3d {
-        self.descriptor.size
+        self.shared.descriptor.size
     }
 
     /// Returns the width of this `Texture`.
     ///
     /// This is always equal to the `size.width` that was specified when creating the texture.
     pub fn width(&self) -> u32 {
-        self.descriptor.size.width
+        self.shared.descriptor.size.width
     }
 
     /// Returns the height of this `Texture`.
     ///
     /// This is always equal to the `size.height` that was specified when creating the texture.
     pub fn height(&self) -> u32 {
-        self.descriptor.size.height
+        self.shared.descriptor.size.height
     }
 
     /// Returns the depth or layer count of this `Texture`.
     ///
     /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
     pub fn depth_or_array_layers(&self) -> u32 {
-        self.descriptor.size.depth_or_array_layers
+        self.shared.descriptor.size.depth_or_array_layers
     }
 
     /// Returns the mip_level_count of this `Texture`.
     ///
     /// This is always equal to the `mip_level_count` that was specified when creating the texture.
     pub fn mip_level_count(&self) -> u32 {
-        self.descriptor.mip_level_count
+        self.shared.descriptor.mip_level_count
     }
 
     /// Returns the sample_count of this `Texture`.
     ///
     /// This is always equal to the `sample_count` that was specified when creating the texture.
     pub fn sample_count(&self) -> u32 {
-        self.descriptor.sample_count
+        self.shared.descriptor.sample_count
     }
 
     /// Returns the dimension of this `Texture`.
     ///
     /// This is always equal to the `dimension` that was specified when creating the texture.
     pub fn dimension(&self) -> TextureDimension {
-        self.descriptor.dimension
+        self.shared.descriptor.dimension
     }
 
     /// Returns the format of this `Texture`.
     ///
     /// This is always equal to the `format` that was specified when creating the texture.
     pub fn format(&self) -> TextureFormat {
-        self.descriptor.format
+        self.shared.descriptor.format
     }
 
     /// Returns the allowed usages of this `Texture`.
     ///
     /// This is always equal to the `usage` that was specified when creating the texture.
     pub fn usage(&self) -> TextureUsages {
-        self.descriptor.usage
+        self.shared.descriptor.usage
     }
 }
 
diff --git a/wgpu/src/api/texture_view.rs b/wgpu/src/api/texture_view.rs
index f255603bcb..5c5d2a553d 100644
--- a/wgpu/src/api/texture_view.rs
+++ b/wgpu/src/api/texture_view.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::*;
 
 /// Handle to a texture view.
@@ -6,9 +8,9 @@ use crate::*;
 /// [`RenderPipeline`] or [`BindGroup`].
 ///
 /// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct TextureView {
-    pub(crate) inner: dispatch::DispatchTextureView,
+    pub(crate) inner: Arc<dispatch::DispatchTextureView>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(TextureView: Send, Sync);
diff --git a/wgpu/src/api/tlas.rs b/wgpu/src/api/tlas.rs
index 538f4e16c2..b260951152 100644
--- a/wgpu/src/api/tlas.rs
+++ b/wgpu/src/api/tlas.rs
@@ -1,6 +1,7 @@
 use crate::{api::blas::TlasInstance, dispatch};
 use crate::{BindingResource, Buffer, Label};
 use std::ops::{Index, IndexMut, Range};
+use std::sync::Arc;
 use wgt::WasmNotSendSync;
 
 /// Descriptor to create top level acceleration structures.
@@ -8,6 +9,12 @@ pub type CreateTlasDescriptor<'a> = wgt::CreateTlasDescriptor<Label<'a>>;
 static_assertions::assert_impl_all!(CreateTlasDescriptor<'_>: Send, Sync);
 
 #[derive(Debug)]
+pub(crate) struct TlasShared {
+    pub(crate) inner: dispatch::DispatchTlas,
+    pub(crate) max_instances: u32,
+}
+
+#[derive(Debug, Clone)]
 /// Top Level Acceleration Structure (TLAS).
 ///
 /// A TLAS contains a series of [TLAS instances], which are a reference to
@@ -18,17 +25,16 @@ static_assertions::assert_impl_all!(CreateTlasDescriptor<'_>: Send, Sync);
 ///
 /// [TLAS instances]: TlasInstance
 pub struct Tlas {
-    pub(crate) inner: dispatch::DispatchTlas,
-    pub(crate) max_instances: u32,
+    pub(crate) shared: Arc<TlasShared>,
 }
 static_assertions::assert_impl_all!(Tlas: WasmNotSendSync);
 
-crate::cmp::impl_eq_ord_hash_proxy!(Tlas => .inner);
+crate::cmp::impl_eq_ord_hash_proxy!(Tlas => .shared.inner);
 
 impl Tlas {
     /// Destroy the associated native resources as soon as possible.
     pub fn destroy(&self) {
-        self.inner.destroy();
+        self.shared.inner.destroy();
     }
 }
 
@@ -56,7 +62,7 @@ static_assertions::assert_impl_all!(TlasPackage: WasmNotSendSync);
 impl TlasPackage {
     /// Construct [TlasPackage] consuming the [Tlas] (prevents modification of the [Tlas] without using this package).
     pub fn new(tlas: Tlas) -> Self {
-        let max_instances = tlas.max_instances;
+        let max_instances = tlas.shared.max_instances;
         Self::new_with_instances(tlas, vec![None; max_instances as usize])
     }
 
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index b2f1f19079..15cab8790f 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -634,7 +634,7 @@ fn map_texture_view_dimension(
 fn map_buffer_copy_view(
     view: crate::TexelCopyBufferInfo<'_>,
 ) -> webgpu_sys::GpuTexelCopyBufferInfo {
-    let buffer = view.buffer.inner.as_webgpu();
+    let buffer = view.buffer.shared.inner.as_webgpu();
     let mapped = webgpu_sys::GpuTexelCopyBufferInfo::new(&buffer.inner);
     if let Some(bytes_per_row) = view.layout.bytes_per_row {
         mapped.set_bytes_per_row(bytes_per_row);
@@ -649,7 +649,7 @@ fn map_buffer_copy_view(
 fn map_texture_copy_view(
     view: crate::TexelCopyTextureInfo<'_>,
 ) -> webgpu_sys::GpuTexelCopyTextureInfo {
-    let texture = view.texture.inner.as_webgpu();
+    let texture = view.texture.shared.inner.as_webgpu();
     let mapped = webgpu_sys::GpuTexelCopyTextureInfo::new(&texture.inner);
     mapped.set_mip_level(view.mip_level);
     mapped.set_origin(&map_origin_3d(view.origin));
@@ -659,7 +659,7 @@ fn map_texture_copy_view(
 fn map_tagged_texture_copy_view(
     view: wgt::CopyExternalImageDestInfo<&crate::api::Texture>,
 ) -> webgpu_sys::GpuCopyExternalImageDestInfo {
-    let texture = view.texture.inner.as_webgpu();
+    let texture = view.texture.shared.inner.as_webgpu();
     let mapped = webgpu_sys::GpuCopyExternalImageDestInfo::new(&texture.inner);
     mapped.set_mip_level(view.mip_level);
     mapped.set_origin(&map_origin_3d(view.origin));
@@ -1938,7 +1938,7 @@ impl dispatch::DeviceInterface for WebDevice {
                         offset,
                         size,
                     }) => {
-                        let buffer = buffer.inner.as_webgpu();
+                        let buffer = buffer.shared.inner.as_webgpu();
                         let mapped_buffer_binding =
                             webgpu_sys::GpuBufferBinding::new(&buffer.inner);
                         mapped_buffer_binding.set_offset(offset as f64);
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 4becb1e8dd..0943c3af7a 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -364,7 +364,7 @@ impl ContextWgpuCore {
 
 fn map_buffer_copy_view(view: crate::TexelCopyBufferInfo<'_>) -> wgc::command::TexelCopyBufferInfo {
     wgc::command::TexelCopyBufferInfo {
-        buffer: view.buffer.inner.as_core().id,
+        buffer: view.buffer.shared.inner.as_core().id,
         layout: view.layout,
     }
 }
@@ -373,7 +373,7 @@ fn map_texture_copy_view(
     view: crate::TexelCopyTextureInfo<'_>,
 ) -> wgc::command::TexelCopyTextureInfo {
     wgc::command::TexelCopyTextureInfo {
-        texture: view.texture.inner.as_core().id,
+        texture: view.texture.shared.inner.as_core().id,
         mip_level: view.mip_level,
         origin: view.origin,
         aspect: view.aspect,
@@ -388,7 +388,7 @@ fn map_texture_tagged_copy_view(
     view: wgt::CopyExternalImageDestInfo<&api::Texture>,
 ) -> wgc::command::CopyExternalImageDestInfo {
     wgc::command::CopyExternalImageDestInfo {
-        texture: view.texture.inner.as_core().id,
+        texture: view.texture.shared.inner.as_core().id,
         mip_level: view.mip_level,
         origin: view.origin,
         aspect: view.aspect,
@@ -810,12 +810,13 @@ impl dispatch::InstanceInterface for ContextWgpuCore {
             },
         }?;
 
-        Ok(dispatch::DispatchSurface::Core(CoreSurface {
+        Ok(CoreSurface {
             context: self.clone(),
             id,
             configured_device: Mutex::default(),
             error_sink: Mutex::default(),
-        }))
+        }
+        .into())
     }
 
     fn request_adapter(
@@ -1105,7 +1106,7 @@ impl dispatch::DeviceInterface for CoreDevice {
             for entry in desc.entries.iter() {
                 if let BindingResource::BufferArray(array) = entry.resource {
                     arrayed_buffer_bindings.extend(array.iter().map(|binding| bm::BufferBinding {
-                        buffer_id: binding.buffer.inner.as_core().id,
+                        buffer_id: binding.buffer.shared.inner.as_core().id,
                         offset: binding.offset,
                         size: binding.size,
                     }));
@@ -1125,7 +1126,7 @@ impl dispatch::DeviceInterface for CoreDevice {
                         offset,
                         size,
                     }) => bm::BindingResource::Buffer(bm::BufferBinding {
-                        buffer_id: buffer.inner.as_core().id,
+                        buffer_id: buffer.shared.inner.as_core().id,
                         offset,
                         size,
                     }),
@@ -1154,7 +1155,7 @@ impl dispatch::DeviceInterface for CoreDevice {
                     }
                     BindingResource::AccelerationStructure(acceleration_structure) => {
                         bm::BindingResource::AccelerationStructure(
-                            acceleration_structure.inner.as_core().id,
+                            acceleration_structure.shared.inner.as_core().id,
                         )
                     }
                 },
@@ -2437,9 +2438,11 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder {
                 crate::BlasGeometries::TriangleGeometries(ref triangle_geometries) => {
                     let iter = triangle_geometries.iter().map(|tg| {
                         wgc::ray_tracing::BlasTriangleGeometry {
-                            vertex_buffer: tg.vertex_buffer.inner.as_core().id,
-                            index_buffer: tg.index_buffer.map(|buf| buf.inner.as_core().id),
-                            transform_buffer: tg.transform_buffer.map(|buf| buf.inner.as_core().id),
+                            vertex_buffer: tg.vertex_buffer.shared.inner.as_core().id,
+                            index_buffer: tg.index_buffer.map(|buf| buf.shared.inner.as_core().id),
+                            transform_buffer: tg
+                                .transform_buffer
+                                .map(|buf| buf.shared.inner.as_core().id),
                             size: tg.size,
                             transform_buffer_offset: tg.transform_buffer_offset,
                             first_vertex: tg.first_vertex,
@@ -2451,15 +2454,15 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder {
                 }
             };
             wgc::ray_tracing::BlasBuildEntry {
-                blas_id: e.blas.shared.inner.as_core().id,
+                blas_id: e.blas.inner.as_core().id,
                 geometries,
             }
         });
 
         let tlas = tlas.into_iter().map(|e: &crate::TlasBuildEntry<'a>| {
             wgc::ray_tracing::TlasBuildEntry {
-                tlas_id: e.tlas.inner.as_core().id,
-                instance_buffer_id: e.instance_buffer.inner.as_core().id,
+                tlas_id: e.tlas.shared.inner.as_core().id,
+                instance_buffer_id: e.instance_buffer.shared.inner.as_core().id,
                 instance_count: e.instance_count,
             }
         });
@@ -2487,9 +2490,11 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder {
                 crate::BlasGeometries::TriangleGeometries(ref triangle_geometries) => {
                     let iter = triangle_geometries.iter().map(|tg| {
                         wgc::ray_tracing::BlasTriangleGeometry {
-                            vertex_buffer: tg.vertex_buffer.inner.as_core().id,
-                            index_buffer: tg.index_buffer.map(|buf| buf.inner.as_core().id),
-                            transform_buffer: tg.transform_buffer.map(|buf| buf.inner.as_core().id),
+                            vertex_buffer: tg.vertex_buffer.shared.inner.as_core().id,
+                            index_buffer: tg.index_buffer.map(|buf| buf.shared.inner.as_core().id),
+                            transform_buffer: tg
+                                .transform_buffer
+                                .map(|buf| buf.shared.inner.as_core().id),
                             size: tg.size,
                             transform_buffer_offset: tg.transform_buffer_offset,
                             first_vertex: tg.first_vertex,
@@ -2501,7 +2506,7 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder {
                 }
             };
             wgc::ray_tracing::BlasBuildEntry {
-                blas_id: e.blas.shared.inner.as_core().id,
+                blas_id: e.blas.inner.as_core().id,
                 geometries,
             }
         });
@@ -2514,14 +2519,14 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder {
                     instance
                         .as_ref()
                         .map(|instance| wgc::ray_tracing::TlasInstance {
-                            blas_id: instance.blas.inner.as_core().id,
+                            blas_id: instance.blas.as_core().id,
                             transform: &instance.transform,
                             custom_index: instance.custom_index,
                             mask: instance.mask,
                         })
                 });
             wgc::ray_tracing::TlasPackage {
-                tlas_id: e.tlas.inner.as_core().id,
+                tlas_id: e.tlas.shared.inner.as_core().id,
                 instances: Box::new(instances),
                 lowest_unmodified: e.lowest_unmodified,
             }
diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs
index 9d181eba93..cad9c11b23 100644
--- a/wgpu/src/util/mod.rs
+++ b/wgpu/src/util/mod.rs
@@ -101,7 +101,7 @@ impl DownloadBuffer {
     ) {
         let size = match buffer.size {
             Some(size) => size.into(),
-            None => buffer.buffer.map_context.lock().total_size - buffer.offset,
+            None => buffer.buffer.shared.map_context.lock().total_size - buffer.offset,
         };
 
         let download = Arc::new(device.create_buffer(&super::BufferDescriptor {
@@ -126,7 +126,7 @@ impl DownloadBuffer {
                     return;
                 }
 
-                let mapped_range = download.inner.get_mapped_range(0..size);
+                let mapped_range = download.shared.inner.get_mapped_range(0..size);
                 callback(Ok(Self {
                     _gpu_buffer: download,
                     mapped_range,