Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache both head and tail index in both Consumer and Producer (again) #132

Merged
merged 2 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,7 @@ impl<T> Producer<T> {
/// For a safe alternative that provides mutable slices of [`Default`]-initialized slots,
/// see [`Producer::write_chunk()`].
pub fn write_chunk_uninit(&mut self, n: usize) -> Result<WriteChunkUninit<'_, T>, ChunkError> {
// "tail" is only ever written by the producer thread, "Relaxed" is enough
let tail = self.buffer.tail.load(Ordering::Relaxed);
let tail = self.cached_tail.get();

// Check if the queue has *possibly* not enough slots.
if self.buffer.capacity - self.buffer.distance(self.cached_head.get(), tail) < n {
Expand Down Expand Up @@ -285,8 +284,7 @@ impl<T> Consumer<T> {
///
/// See the documentation of the [`chunks`](crate::chunks#examples) module.
pub fn read_chunk(&mut self, n: usize) -> Result<ReadChunk<'_, T>, ChunkError> {
// "head" is only ever written by the consumer thread, "Relaxed" is enough
let head = self.buffer.head.load(Ordering::Relaxed);
let head = self.cached_head.get();

// Check if the queue has *possibly* not enough slots.
if self.buffer.distance(head, self.cached_tail.get()) < n {
Expand Down Expand Up @@ -497,10 +495,9 @@ impl<T> WriteChunkUninit<'_, T> {

unsafe fn commit_unchecked(self, n: usize) -> usize {
let p = self.producer;
// "tail" is only ever written by the producer thread, "Relaxed" is enough
let tail = p.buffer.tail.load(Ordering::Relaxed);
let tail = p.buffer.increment(tail, n);
let tail = p.buffer.increment(p.cached_tail.get(), n);
p.buffer.tail.store(tail, Ordering::Release);
p.cached_tail.set(tail);
n
}

Expand Down Expand Up @@ -744,10 +741,9 @@ impl<T> ReadChunk<'_, T> {
unsafe { self.second_ptr.add(i).drop_in_place() };
}
let c = self.consumer;
// "head" is only ever written by the consumer thread, "Relaxed" is enough
let head = c.buffer.head.load(Ordering::Relaxed);
let head = c.buffer.increment(head, n);
let head = c.buffer.increment(c.cached_head.get(), n);
c.buffer.head.store(head, Ordering::Release);
c.cached_head.set(head);
n
}

Expand Down Expand Up @@ -799,10 +795,9 @@ impl<T> Drop for ReadChunkIntoIter<'_, T> {
/// Non-iterated items remain in the ring buffer and are *not* dropped.
fn drop(&mut self) {
let c = &self.chunk.consumer;
// "head" is only ever written by the consumer thread, "Relaxed" is enough
let head = c.buffer.head.load(Ordering::Relaxed);
let head = c.buffer.increment(head, self.iterated);
let head = c.buffer.increment(c.cached_head.get(), self.iterated);
c.buffer.head.store(head, Ordering::Release);
c.cached_head.set(head);
}
}

Expand Down
32 changes: 22 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,11 @@ impl<T> RingBuffer<T> {
let p = Producer {
buffer: buffer.clone(),
cached_head: Cell::new(0),
cached_tail: Cell::new(0),
};
let c = Consumer {
buffer,
cached_head: Cell::new(0),
cached_tail: Cell::new(0),
};
(p, c)
Expand Down Expand Up @@ -283,6 +285,13 @@ pub struct Producer<T> {
///
/// This value can be stale and sometimes needs to be resynchronized with `buffer.head`.
cached_head: Cell<usize>,

/// A copy of `buffer.tail` for quick access.
///
/// This value is always in sync with `buffer.tail`.
// NB: Caching the tail seems to have little effect on Intel CPUs, but it seems to
// improve performance on AMD CPUs, see https://github.com/mgeier/rtrb/pull/132
cached_tail: Cell<usize>,
}

// SAFETY: After moving a Producer to another thread, there is still only a single thread
Expand Down Expand Up @@ -315,6 +324,7 @@ impl<T> Producer<T> {
unsafe { self.buffer.slot_ptr(tail).write(value) };
let tail = self.buffer.increment1(tail);
self.buffer.tail.store(tail, Ordering::Release);
self.cached_tail.set(tail);
Ok(())
} else {
Err(PushError::Full(value))
Expand Down Expand Up @@ -342,9 +352,7 @@ impl<T> Producer<T> {
pub fn slots(&self) -> usize {
let head = self.buffer.head.load(Ordering::Acquire);
self.cached_head.set(head);
// "tail" is only ever written by the producer thread, "Relaxed" is enough
let tail = self.buffer.tail.load(Ordering::Relaxed);
self.buffer.capacity - self.buffer.distance(head, tail)
self.buffer.capacity - self.buffer.distance(head, self.cached_tail.get())
}

/// Returns `true` if there are currently no slots available for writing.
Expand Down Expand Up @@ -445,8 +453,7 @@ impl<T> Producer<T> {
/// This is a strict subset of the functionality implemented in `write_chunk_uninit()`.
/// For performance, this special case is immplemented separately.
fn next_tail(&self) -> Option<usize> {
// "tail" is only ever written by the producer thread, "Relaxed" is enough
let tail = self.buffer.tail.load(Ordering::Relaxed);
let tail = self.cached_tail.get();

// Check if the queue is *possibly* full.
if self.buffer.distance(self.cached_head.get(), tail) == self.buffer.capacity {
Expand Down Expand Up @@ -488,6 +495,13 @@ pub struct Consumer<T> {
/// A reference to the ring buffer.
buffer: Arc<RingBuffer<T>>,

/// A copy of `buffer.head` for quick access.
///
/// This value is always in sync with `buffer.head`.
// NB: Caching the head seems to have little effect on Intel CPUs, but it seems to
// improve performance on AMD CPUs, see https://github.com/mgeier/rtrb/pull/132
cached_head: Cell<usize>,

/// A copy of `buffer.tail` for quick access.
///
/// This value can be stale and sometimes needs to be resynchronized with `buffer.tail`.
Expand Down Expand Up @@ -534,6 +548,7 @@ impl<T> Consumer<T> {
let value = unsafe { self.buffer.slot_ptr(head).read() };
let head = self.buffer.increment1(head);
self.buffer.head.store(head, Ordering::Release);
self.cached_head.set(head);
Ok(value)
} else {
Err(PopError::Empty)
Expand Down Expand Up @@ -588,9 +603,7 @@ impl<T> Consumer<T> {
pub fn slots(&self) -> usize {
let tail = self.buffer.tail.load(Ordering::Acquire);
self.cached_tail.set(tail);
// "head" is only ever written by the consumer thread, "Relaxed" is enough
let head = self.buffer.head.load(Ordering::Relaxed);
self.buffer.distance(head, tail)
self.buffer.distance(self.cached_head.get(), tail)
}

/// Returns `true` if there are currently no slots available for reading.
Expand Down Expand Up @@ -690,8 +703,7 @@ impl<T> Consumer<T> {
/// This is a strict subset of the functionality implemented in `read_chunk()`.
/// For performance, this special case is immplemented separately.
fn next_head(&self) -> Option<usize> {
// "head" is only ever written by the consumer thread, "Relaxed" is enough
let head = self.buffer.head.load(Ordering::Relaxed);
let head = self.cached_head.get();

// Check if the queue is *possibly* empty.
if head == self.cached_tail.get() {
Expand Down
Loading