Skip to content

Commit

Permalink
Run job deregistering in a single transaction
Browse files Browse the repository at this point in the history
Fixes #4299

Upon investigating this case further, we determined the issue to be a race between applying `JobBatchDeregisterRequest` fsm operation and processing job-deregister evals.

Processing job-deregister evals should wait until the FSM log message finishes applying, by using the snapshot index.  However, with `JobBatchDeregister`, any single individual job deregistering was applied accidentally incremented the snapshot index and resulted into processing job-deregister evals.  When a Nomad server receives an eval for a job in the batch that is yet to be deleted, we accidentally re-run it depending on the state of allocation.

This change ensures that we delete deregister all of the jobs and inserts all evals in a single transactions, thus blocking processing related evals until deregistering complete.
  • Loading branch information
Mahmood Ali authored and Preetha Appan committed Nov 13, 2018
1 parent 9c74ba7 commit 6eb1078
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 17 deletions.
44 changes: 30 additions & 14 deletions nomad/fsm.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,12 +499,14 @@ func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} {
panic(fmt.Errorf("failed to decode request: %v", err))
}

if err := n.handleJobDeregister(index, req.JobID, req.Namespace, req.Purge); err != nil {
n.logger.Printf("[ERR] nomad.fsm: deregistering job failed: %v", err)
return err
}
return n.state.WithWriteTransaction(func(tx state.Txn) error {
if err := n.handleJobDeregister(index, req.JobID, req.Namespace, req.Purge, tx); err != nil {
n.logger.Printf("deregistering job failed:%v", err)
return err
}

return nil
return nil
})
}

func (n *nomadFSM) applyBatchDeregisterJob(buf []byte, index uint64) interface{} {
Expand All @@ -514,38 +516,52 @@ func (n *nomadFSM) applyBatchDeregisterJob(buf []byte, index uint64) interface{}
panic(fmt.Errorf("failed to decode request: %v", err))
}

for jobNS, options := range req.Jobs {
if err := n.handleJobDeregister(index, jobNS.ID, jobNS.Namespace, options.Purge); err != nil {
n.logger.Printf("[ERR] nomad.fsm: deregistering %v failed: %v", jobNS, err)
err := n.state.WithWriteTransaction(func(tx state.Txn) error {
for jobNS, options := range req.Jobs {
if err := n.handleJobDeregister(index, jobNS.ID, jobNS.Namespace, options.Purge, tx); err != nil {
n.logger.Printf("[ERR] deregistering job %v failed: %v", jobNS)
return err
}
}

if err := n.state.UpsertEvalsTxn(index, req.Evals, tx); err != nil {
n.logger.Printf("[ERR] UpsertEvals failed:%v", err)
return err
}

return nil
})

if err != nil {
return err
}

return n.upsertEvals(index, req.Evals)
n.handleUpsertedEvals(req.Evals)
return nil
}

// handleJobDeregister is used to deregister a job.
func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, purge bool) error {
func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, purge bool, tx state.Txn) error {
// If it is periodic remove it from the dispatcher
if err := n.periodicDispatcher.Remove(namespace, jobID); err != nil {
n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err)
return err
}

if purge {
if err := n.state.DeleteJob(index, namespace, jobID); err != nil {
if err := n.state.DeleteJobTxn(index, namespace, jobID, tx); err != nil {
n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err)
return err
}

// We always delete from the periodic launch table because it is possible that
// the job was updated to be non-periodic, thus checking if it is periodic
// doesn't ensure we clean it up properly.
n.state.DeletePeriodicLaunch(index, namespace, jobID)
n.state.DeletePeriodicLaunchTxn(index, namespace, jobID, tx)
} else {
// Get the current job and mark it as stopped and re-insert it.
ws := memdb.NewWatchSet()
current, err := n.state.JobByID(ws, namespace, jobID)
current, err := n.state.JobByIDTxn(ws, namespace, jobID, tx)
if err != nil {
n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err)
return err
Expand All @@ -558,7 +574,7 @@ func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, pu
stopped := current.Copy()
stopped.Stop = true

if err := n.state.UpsertJob(index, stopped); err != nil {
if err := n.state.UpsertJobTxn(index, stopped, tx); err != nil {
n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err)
return err
}
Expand Down
47 changes: 44 additions & 3 deletions nomad/state/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)

type Txn = *memdb.Txn

const (
// NodeRegisterEventReregistered is the message used when the node becomes
// reregistered.
Expand Down Expand Up @@ -883,6 +885,10 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
return nil
}

func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error {
return s.upsertJobImpl(index, job, false, txn)
}

// upsertJobImpl is the implementation for registering a job or updating a job definition
func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error {
// COMPAT 0.7: Upgrade old objects that do not have namespaces
Expand Down Expand Up @@ -970,6 +976,14 @@ func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error {
txn := s.db.Txn(true)
defer txn.Abort()

err := s.DeleteJobTxn(index, namespace, jobID, txn)
if err == nil {
txn.Commit()
}
return err
}

func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error {
// COMPAT 0.7: Upgrade old objects that do not have namespaces
if namespace == "" {
namespace = structs.DefaultNamespace
Expand Down Expand Up @@ -1056,7 +1070,6 @@ func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error {
return fmt.Errorf("index update failed: %v", err)
}

txn.Commit()
return nil
}

Expand Down Expand Up @@ -1154,7 +1167,10 @@ func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb
// version.
func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) {
txn := s.db.Txn(false)
return s.JobByIDTxn(ws, namespace, id, txn)
}

func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) {
// COMPAT 0.7: Upgrade old objects that do not have namespaces
if namespace == "" {
namespace = structs.DefaultNamespace
Expand Down Expand Up @@ -1475,6 +1491,14 @@ func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string)
txn := s.db.Txn(true)
defer txn.Abort()

err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn)
if err == nil {
txn.Commit()
}
return err
}

func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error {
// COMPAT 0.7: Upgrade old objects that do not have namespaces
if namespace == "" {
namespace = structs.DefaultNamespace
Expand All @@ -1497,7 +1521,6 @@ func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string)
return fmt.Errorf("index update failed: %v", err)
}

txn.Commit()
return nil
}

Expand Down Expand Up @@ -1544,6 +1567,14 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
txn := s.db.Txn(true)
defer txn.Abort()

err := s.UpsertEvalsTxn(index, evals, txn)
if err == nil {
txn.Commit()
}
return err
}

func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error {
// Do a nested upsert
jobs := make(map[structs.NamespacedID]string, len(evals))
for _, eval := range evals {
Expand All @@ -1563,7 +1594,6 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
return fmt.Errorf("setting job status failed: %v", err)
}

txn.Commit()
return nil
}

Expand Down Expand Up @@ -3843,6 +3873,17 @@ func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs
return nil
}

func (s *StateStore) WithWriteTransaction(fn func(Txn) error) error {
tx := s.db.Txn(true)
defer tx.Abort()

err := fn(tx)
if err == nil {
tx.Commit()
}
return err
}

// StateSnapshot is used to provide a point-in-time snapshot
type StateSnapshot struct {
StateStore
Expand Down

0 comments on commit 6eb1078

Please sign in to comment.