Skip to content

Commit

Permalink
proof of concept for injecting faults into FSM.Apply
Browse files Browse the repository at this point in the history
This changeset is a proof-of-concept for a fault injection interface
into the `FSM.Apply` function. This would allow us to introduce
timeouts or errors in unit testing by adding a LogApplier
implementation to a map of `interceptionAppliers`. This is similar to
how we register LogAppliers for the enterprise FSM functions
currently. Most interception appliers are expected to then call the
normal applier directly.

This was developed initially for #13407 but can't be used to reproduce
that particular bug. But I'm opening this PR for further discussion
about whether this is a worthwhile tool to have for testing otherwise.
  • Loading branch information
tgross committed Jun 17, 2022
1 parent 5e0964e commit 41c5318
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
28 changes: 19 additions & 9 deletions nomad/fsm.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ type nomadFSM struct {
// enterpriseRestorers holds the set of enterprise only snapshot restorers
enterpriseRestorers SnapshotRestorers

// faultInjectionAppliers holds a set of test-only LogAppliers
// used to intercept raft messages to inject faults
interceptionAppliers LogAppliers

// stateLock is only used to protect outside callers to State() from
// racing with Restore(), which is called by Raft (it puts in a totally
// new state store). Everything internal here is synchronized by the
Expand Down Expand Up @@ -153,15 +157,16 @@ func NewFSM(config *FSMConfig) (*nomadFSM, error) {
}

fsm := &nomadFSM{
evalBroker: config.EvalBroker,
periodicDispatcher: config.Periodic,
blockedEvals: config.Blocked,
logger: config.Logger.Named("fsm"),
config: config,
state: state,
timetable: NewTimeTable(timeTableGranularity, timeTableLimit),
enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8),
enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8),
evalBroker: config.EvalBroker,
periodicDispatcher: config.Periodic,
blockedEvals: config.Blocked,
logger: config.Logger.Named("fsm"),
config: config,
state: state,
timetable: NewTimeTable(timeTableGranularity, timeTableLimit),
enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8),
enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8),
interceptionAppliers: make(map[structs.MessageType]LogApplier, 8),
}

// Register all the log applier functions
Expand Down Expand Up @@ -207,6 +212,11 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} {
ignoreUnknown = true
}

// Check interception message types.
if applier, ok := n.interceptionAppliers[msgType]; ok {
return applier(buf[1:], log.Index)
}

switch msgType {
case structs.NodeRegisterRequestType:
return n.applyUpsertNode(msgType, buf[1:], log.Index)
Expand Down
13 changes: 13 additions & 0 deletions nomad/plan_endpoint_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package nomad

import (
"fmt"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -140,6 +141,18 @@ func TestPlanEndpoint_ApplyConcurrent(t *testing.T) {
defer cleanupS1()
testutil.WaitForLeader(t, s1.RPC)

planApplyFn := func(buf []byte, index uint64) interface{} {
if index == 8 {
fmt.Println("introducing delay")
time.Sleep(6000 * time.Millisecond)
}
return s1.fsm.applyPlanResults(structs.MsgTypeTestSetup, buf, index)
}

s1.fsm.interceptionAppliers = map[structs.MessageType]LogApplier{
structs.ApplyPlanResultsRequestType: planApplyFn,
}

plans := []*structs.Plan{}

for i := 0; i < 5; i++ {
Expand Down

0 comments on commit 41c5318

Please sign in to comment.