Skip to content

Commit

Permalink
Fix bug where election timeout task was not being properly cleaned-up.
Browse files Browse the repository at this point in the history
This resulted in a scenario where:
- a follower node gets elected.
- it fails to fully clean-up its timeout task (cancelles it but doesn't
`.take()` the Option's value).
- next time the node becomes a follower, it fails to spawn a new
election timeout task.

Closes #41
  • Loading branch information
thedodd committed Feb 13, 2020
1 parent fd0e34c commit 52cc48e
Showing 1 changed file with 12 additions and 13 deletions.
25 changes: 12 additions & 13 deletions src/raft/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,7 @@ impl<D: AppData, R: AppDataResponse, E: AppError, N: RaftNetwork<D>, S: RaftStor
self.cleanup_state(ctx);

// Ensure there is no election timeout.
self.election_timeout_stamp = None;
if let Some(handle) = self.election_timeout.take() {
ctx.cancel_future(handle);
}
self.cancel_election_timeout(ctx);

// Perform the transition.
self.state = RaftState::NonVoter;
Expand All @@ -204,9 +201,7 @@ impl<D: AppData, R: AppDataResponse, E: AppError, N: RaftNetwork<D>, S: RaftStor
self.cleanup_state(ctx);

// Ensure we have an election timeout loop running.
if self.election_timeout.is_none() {
self.update_election_timeout(ctx);
}
self.update_election_timeout(ctx); // Cleans-up any old timeout task, and spawns a new one.

// Perform the transition.
self.state = RaftState::Follower(FollowerState::default());
Expand Down Expand Up @@ -291,9 +286,7 @@ impl<D: AppData, R: AppDataResponse, E: AppError, N: RaftNetwork<D>, S: RaftStor
fn become_leader(&mut self, ctx: &mut Context<Self>) {
// Cleanup previous state & ensure we've cancelled the election timeout system.
self.cleanup_state(ctx);
if let Some(handle) = self.election_timeout {
ctx.cancel_future(handle);
}
self.cancel_election_timeout(ctx);

// Prep new leader state.
let (client_request_queue, client_request_receiver) = mpsc::unbounded();
Expand Down Expand Up @@ -540,9 +533,7 @@ impl<D: AppData, R: AppDataResponse, E: AppError, N: RaftNetwork<D>, S: RaftStor
}

// Cancel any current election timeout before spawning a new one.
if let Some(handle) = self.election_timeout.take() {
ctx.cancel_future(handle);
}
self.cancel_election_timeout(ctx);

let timeout = Duration::from_millis(self.config.election_timeout_millis);
self.election_timeout_stamp = Some(Instant::now() + timeout.clone());
Expand All @@ -560,6 +551,14 @@ impl<D: AppData, R: AppDataResponse, E: AppError, N: RaftNetwork<D>, S: RaftStor
self.election_timeout_stamp = Some(Instant::now() + Duration::from_millis(self.config.election_timeout_millis));
}

/// Cancel the current election timeout task if present & clean-up the election timeout stamp.
fn cancel_election_timeout(&mut self, ctx: &mut Context<Self>) {
self.election_timeout_stamp = None;
if let Some(handle) = self.election_timeout.take() {
ctx.cancel_future(handle);
}
}

/// Update the node's current membership config.
///
/// NOTE WELL: if a leader is stepping down, it should not call this method, as it will cause
Expand Down

0 comments on commit 52cc48e

Please sign in to comment.