-
Notifications
You must be signed in to change notification settings - Fork 11.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[authority] Batch crash robustness #714
Changes from all commits
4a0bcd6
71899a5
44cea7f
4353707
601ae06
df9980f
432c98d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
// Copyright (c) 2022, Mysten Labs, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
use super::*; | ||
|
||
use parking_lot::Mutex; | ||
use std::collections::HashMap; | ||
use tokio::sync::mpsc::UnboundedSender; | ||
|
||
pub struct AutoIncSenderInner<T> { | ||
pub next_available_sequence_number: u64, | ||
pub next_expected_sequence_number: u64, | ||
pub sender: UnboundedSender<(u64, T)>, | ||
pub waiting: HashMap<u64, Option<T>>, | ||
} | ||
|
||
impl<T> AutoIncSenderInner<T> { | ||
pub fn send_all_waiting(&mut self) { | ||
while let Some(item_opt) = self.waiting.remove(&self.next_expected_sequence_number) { | ||
if let Some(item) = item_opt { | ||
if let Err(_err) = self.sender.send((self.next_expected_sequence_number, item)) { | ||
/* | ||
An error here indicates the other side of the channel is closed. | ||
There is not very much we can do, as if the batcher is closed we | ||
will write to the DB and the recover when we recover. | ||
*/ | ||
|
||
self.waiting.clear(); | ||
} | ||
} | ||
self.next_expected_sequence_number += 1; | ||
} | ||
} | ||
} | ||
|
||
/* | ||
A wrapper around a channel sender that ensures items sent are associated with | ||
integer tickets and sent in increasing ticket order. When a ticket is dropped | ||
its ticket value is skipped and the subsequent tickets are sent. | ||
|
||
If the receiver end of the channel is closed, the autoinc sender simply drops | ||
all the items sent. | ||
*/ | ||
|
||
#[derive(Clone)] | ||
pub struct AutoIncSender<T>(pub Arc<Mutex<AutoIncSenderInner<T>>>); | ||
|
||
impl<T> AutoIncSender<T> { | ||
// Creates a new auto-incrementing sender | ||
pub fn new(sender: UnboundedSender<(u64, T)>, next_sequence_number: u64) -> AutoIncSender<T> { | ||
AutoIncSender(Arc::new(Mutex::new(AutoIncSenderInner { | ||
// TODO: next_available_sequence_number could be an AtomicU64 instead. | ||
next_available_sequence_number: next_sequence_number, | ||
next_expected_sequence_number: next_sequence_number, | ||
sender, | ||
waiting: HashMap::new(), | ||
}))) | ||
} | ||
|
||
/// Creates a new ticket with the next available sequence number. | ||
pub fn next_ticket(&self) -> Ticket<T> { | ||
let ticket_number = { | ||
// Keep the critical region as small as possible | ||
let mut aic = self.0.lock(); | ||
let ticket_number_inner = aic.next_available_sequence_number; | ||
aic.next_available_sequence_number += 1; | ||
ticket_number_inner | ||
}; | ||
|
||
Ticket { | ||
autoinc_sender: self.0.clone(), | ||
sequence_number: ticket_number, | ||
sent: false, | ||
} | ||
} | ||
} | ||
|
||
/// A ticket represents a slot in the sequence to be sent in the channel | ||
pub struct Ticket<T> { | ||
autoinc_sender: Arc<Mutex<AutoIncSenderInner<T>>>, | ||
sequence_number: u64, | ||
sent: bool, | ||
} | ||
|
||
impl<T> Ticket<T> | ||
where | ||
T: std::fmt::Debug, | ||
{ | ||
/// Send an item at that sequence in the channel. | ||
pub fn send(&mut self, item: T) { | ||
let mut aic = self.autoinc_sender.lock(); | ||
if aic.sender.is_closed() { | ||
// To ensure we do not fill our memory | ||
return; | ||
} | ||
aic.waiting.insert(self.sequence_number, Some(item)); | ||
self.sent = true; | ||
aic.send_all_waiting(); | ||
} | ||
|
||
/// Get the ticket sequence number | ||
pub fn ticket(&self) -> u64 { | ||
self.sequence_number | ||
} | ||
} | ||
|
||
/// A custom drop that indicates that there may not be a item | ||
/// associated with this sequence number, | ||
impl<T> Drop for Ticket<T> { | ||
fn drop(&mut self) { | ||
if !self.sent { | ||
let mut aic = self.autoinc_sender.lock(); | ||
if aic.sender.is_closed() { | ||
return; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to unlock here before returning? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Upon return the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Locks drop when they go out of scope. |
||
} | ||
aic.waiting.insert(self.sequence_number, None); | ||
aic.send_all_waiting(); | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[tokio::test] | ||
async fn test_ticketing() { | ||
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); | ||
let autoinc = AutoIncSender::new(tx, 10); | ||
|
||
let mut t1 = autoinc.next_ticket(); | ||
let t2 = autoinc.next_ticket(); | ||
let t3 = autoinc.next_ticket(); | ||
let mut t4 = autoinc.next_ticket(); | ||
|
||
// Send a value out of order | ||
t4.send(1010); | ||
|
||
// Drop a ticket | ||
drop(t2); | ||
|
||
// Panic and lose a ticket in a task | ||
let handle = tokio::spawn(async move { | ||
let _inner = t3; | ||
panic!("Crash here!"); | ||
// t3.send(1010).await; | ||
}); | ||
|
||
// drive the task to completion, ie panic | ||
assert!(handle.await.is_err()); | ||
|
||
// Send the initial ticket | ||
t1.send(1040); | ||
|
||
// Try to read | ||
let (s1, v1) = rx.recv().await.unwrap(); | ||
let (s2, v2) = rx.recv().await.unwrap(); | ||
|
||
assert_eq!(10, s1); | ||
assert_eq!(13, s2); | ||
assert_eq!(1040, v1); | ||
assert_eq!(1010, v2); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing we could do is at least post a log line if the size of this is past a threshold, and possibly turn that into a bigger warning if it's growing w/o removal.