From 30a73a328b3375c19c9e1b582d5d128f175fbfdd Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 7 Aug 2024 18:15:43 +0200 Subject: [PATCH 1/5] call `flush_mapped_ranges` when unmapping write-mapped buffers I'm not sure how things worked without this. --- wgpu-core/src/device/global.rs | 6 ++++-- wgpu-core/src/device/life.rs | 11 +++++++---- wgpu-core/src/device/mod.rs | 6 +++--- wgpu-core/src/device/resource.rs | 9 ++++++--- wgpu-core/src/resource.rs | 15 +++++++++++---- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 0aab087ad4..1f70ee09ed 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2418,7 +2418,9 @@ impl Global { Ok((ptr, range_size)) } resource::BufferMapState::Active { - ref ptr, ref range, .. + ref mapping, + ref range, + .. } => { if offset < range.start { return Err(BufferAccessError::OutOfBoundsUnderrun { @@ -2437,7 +2439,7 @@ impl Global { let relative_offset = (offset - range.start) as isize; unsafe { Ok(( - NonNull::new_unchecked(ptr.as_ptr().offset(relative_offset)), + NonNull::new_unchecked(mapping.ptr.as_ptr().offset(relative_offset)), range_size, )) } diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 1bb687d7e2..7408c184dc 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -391,10 +391,10 @@ impl LifetimeTracker { host, snatch_guard, ) { - Ok(ptr) => { + Ok(mapping) => { *buffer.map_state.lock() = resource::BufferMapState::Active { - ptr, - range: pending_mapping.range.start..pending_mapping.range.start + size, + mapping, + range: pending_mapping.range.clone(), host, }; Ok(()) @@ -406,7 +406,10 @@ impl LifetimeTracker { } } else { *buffer.map_state.lock() = resource::BufferMapState::Active { - ptr: std::ptr::NonNull::dangling(), + mapping: hal::BufferMapping { + ptr: std::ptr::NonNull::dangling(), + is_coherent: true, + }, range: pending_mapping.range, host: pending_mapping.op.host, }; diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index d33de22dac..693d8f8c8b 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -18,7 +18,7 @@ use std::os::raw::c_char; use thiserror::Error; use wgt::{BufferAddress, DeviceLostReason, TextureFormat}; -use std::{iter, num::NonZeroU32, ptr}; +use std::{iter, num::NonZeroU32}; pub mod any_device; pub(crate) mod bgl; @@ -307,7 +307,7 @@ fn map_buffer( size: BufferAddress, kind: HostMap, snatch_guard: &SnatchGuard, -) -> Result, BufferAccessError> { +) -> Result { let raw_buffer = buffer.try_raw(snatch_guard)?; let mapping = unsafe { raw.map_buffer(raw_buffer, offset..offset + size) @@ -360,7 +360,7 @@ fn map_buffer( } } - Ok(mapping.ptr) + Ok(mapping) } #[derive(Clone, Debug)] diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 621149404f..96b4e9800f 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -611,8 +611,11 @@ impl Device { } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { // buffer is mappable, so we are just doing that at start let map_size = buffer.size; - let ptr = if map_size == 0 { - std::ptr::NonNull::dangling() + let mapping = if map_size == 0 { + hal::BufferMapping { + ptr: std::ptr::NonNull::dangling(), + is_coherent: true, + } } else { let snatch_guard: SnatchGuard = self.snatchable_lock.read(); map_buffer( @@ -625,7 +628,7 @@ impl Device { )? }; *buffer.map_state.lock() = resource::BufferMapState::Active { - ptr, + mapping, range: 0..map_size, host: HostMap::Write, }; diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 54f2b114ec..0b9a12125a 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -229,7 +229,7 @@ pub(crate) enum BufferMapState { Waiting(BufferPendingMapping), /// Mapped Active { - ptr: NonNull, + mapping: hal::BufferMapping, range: hal::MemoryRange, host: HostMap, }, @@ -669,13 +669,18 @@ impl Buffer { BufferMapState::Waiting(pending) => { return Ok(Some((pending.op, Err(BufferAccessError::MapAborted)))); } - BufferMapState::Active { ptr, range, host } => { + BufferMapState::Active { + mapping, + range, + host, + } => { + #[allow(clippy::collapsible_if)] if host == HostMap::Write { #[cfg(feature = "trace")] if let Some(ref mut trace) = *device.trace.lock() { let size = range.end - range.start; let data = trace.make_binary("bin", unsafe { - std::slice::from_raw_parts(ptr.as_ptr(), size as usize) + std::slice::from_raw_parts(mapping.ptr.as_ptr(), size as usize) }); trace.add(trace::Action::WriteBuffer { id: buffer_id, @@ -684,7 +689,9 @@ impl Buffer { queued: false, }); } - let _ = (ptr, range); + if !mapping.is_coherent { + unsafe { device.raw().flush_mapped_ranges(raw_buf, iter::once(range)) }; + } } unsafe { device.raw().unmap_buffer(raw_buf) }; } From 03c76618f064fdac4257b7409ad225c53e1d1fcd Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:42:43 +0200 Subject: [PATCH 2/5] remove `Buffer.sync_mapped_writes` `zero_init_needs_flush_now` was always equal to `mapping.is_coherent` which is not correct but is fixed by the next commit. --- wgpu-core/src/device/mod.rs | 16 +++++----------- wgpu-core/src/device/resource.rs | 2 -- wgpu-core/src/lock/rank.rs | 1 - wgpu-core/src/resource.rs | 1 - 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 693d8f8c8b..539dfdb3d2 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -314,14 +314,11 @@ fn map_buffer( .map_err(DeviceError::from)? }; - *buffer.sync_mapped_writes.lock() = match kind { - HostMap::Read if !mapping.is_coherent => unsafe { + if !mapping.is_coherent && kind == HostMap::Read { + unsafe { raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size)); - None - }, - HostMap::Write if !mapping.is_coherent => Some(offset..offset + size), - _ => None, - }; + } + } assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0); assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0); @@ -339,9 +336,6 @@ fn map_buffer( // If this is a write mapping zeroing out the memory here is the only // reasonable way as all data is pushed to GPU anyways. - // No need to flush if it is flushed later anyways. - let zero_init_needs_flush_now = - mapping.is_coherent && buffer.sync_mapped_writes.lock().is_none(); let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) }; for uninitialized in buffer @@ -355,7 +349,7 @@ fn map_buffer( (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize; mapped[fill_range].fill(0); - if zero_init_needs_flush_now { + if mapping.is_coherent { unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) }; } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 96b4e9800f..e3abccd886 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -597,7 +597,6 @@ impl Device { rank::BUFFER_INITIALIZATION_STATUS, BufferInitTracker::new(aligned_size), ), - sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None), map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle), label: desc.label.to_string(), tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()), @@ -697,7 +696,6 @@ impl Device { rank::BUFFER_INITIALIZATION_STATUS, BufferInitTracker::new(0), ), - sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None), map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle), label: desc.label.to_string(), tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()), diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs index 5e9bd37193..2539ffe16d 100644 --- a/wgpu-core/src/lock/rank.rs +++ b/wgpu-core/src/lock/rank.rs @@ -120,7 +120,6 @@ define_lock_ranks! { rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { } rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { } - rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { } rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { } rank DEVICE_FENCE "Device::fence" followed by { } #[allow(dead_code)] diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 0b9a12125a..b9d35a6012 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -431,7 +431,6 @@ pub struct Buffer { pub(crate) usage: wgt::BufferUsages, pub(crate) size: wgt::BufferAddress, pub(crate) initialization_status: RwLock, - pub(crate) sync_mapped_writes: Mutex>, /// The `label` from the descriptor used to create the resource. pub(crate) label: String, pub(crate) tracking_data: TrackingData, From 7b7c26594ec6c4acc799a40c2e52149bbce5b176 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 7 Aug 2024 18:13:42 +0200 Subject: [PATCH 3/5] fix check for `flush_mapped_ranges` in `map_buffer` `flush_mapped_ranges` needs to be called when mappings are not coherent. We can also omit flushing for write-mapped buffers since we always flush them on unmap. --- wgpu-core/src/device/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 539dfdb3d2..1f890de902 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -349,7 +349,7 @@ fn map_buffer( (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize; mapped[fill_range].fill(0); - if mapping.is_coherent { + if !mapping.is_coherent && kind == HostMap::Read { unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) }; } } From c1d576c5fefad173efb52335d473d6b2fc5d9de9 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:29:50 +0200 Subject: [PATCH 4/5] [gl] fix usage of `glFlushMappedBufferRange` `offset` is relative to the start of the mapping not the start of the buffer. --- wgpu-hal/src/gles/device.rs | 7 ++++++- wgpu-hal/src/gles/mod.rs | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 77c08c8ce0..3e2e308259 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -536,6 +536,7 @@ impl crate::Device for super::Device { size: desc.size, map_flags: 0, data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))), + offset_of_current_mapping: Arc::new(Mutex::new(0)), }); } @@ -635,6 +636,7 @@ impl crate::Device for super::Device { size: desc.size, map_flags, data, + offset_of_current_mapping: Arc::new(Mutex::new(0)), }) } @@ -668,6 +670,7 @@ impl crate::Device for super::Device { unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) }; slice.as_mut_ptr() } else { + *buffer.offset_of_current_mapping.lock().unwrap() = range.start; unsafe { gl.map_buffer_range( buffer.target, @@ -693,6 +696,7 @@ impl crate::Device for super::Device { unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; unsafe { gl.unmap_buffer(buffer.target) }; unsafe { gl.bind_buffer(buffer.target, None) }; + *buffer.offset_of_current_mapping.lock().unwrap() = 0; } } } @@ -704,10 +708,11 @@ impl crate::Device for super::Device { let gl = &self.shared.context.lock(); unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; for range in ranges { + let offset_of_current_mapping = *buffer.offset_of_current_mapping.lock().unwrap(); unsafe { gl.flush_mapped_buffer_range( buffer.target, - range.start as i32, + (range.start - offset_of_current_mapping) as i32, (range.end - range.start) as i32, ) }; diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 73915d53e2..459600df7e 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -299,6 +299,7 @@ pub struct Buffer { size: wgt::BufferAddress, map_flags: u32, data: Option>>>, + offset_of_current_mapping: Arc>, } #[cfg(send_sync)] From 6b4fea1364c575e0aca02a8b6002700666997a07 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Thu, 8 Aug 2024 18:17:04 +0200 Subject: [PATCH 5/5] [gl] gate usage of `glFlushMappedBufferRange` This is done in the same way as in `map_buffer` & `unmap_buffer`. --- wgpu-hal/src/gles/device.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 3e2e308259..c651da6828 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -705,17 +705,20 @@ impl crate::Device for super::Device { I: Iterator, { if let Some(raw) = buffer.raw { - let gl = &self.shared.context.lock(); - unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; - for range in ranges { - let offset_of_current_mapping = *buffer.offset_of_current_mapping.lock().unwrap(); - unsafe { - gl.flush_mapped_buffer_range( - buffer.target, - (range.start - offset_of_current_mapping) as i32, - (range.end - range.start) as i32, - ) - }; + if buffer.data.is_none() { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + for range in ranges { + let offset_of_current_mapping = + *buffer.offset_of_current_mapping.lock().unwrap(); + unsafe { + gl.flush_mapped_buffer_range( + buffer.target, + (range.start - offset_of_current_mapping) as i32, + (range.end - range.start) as i32, + ) + }; + } } } }