Skip to content

Commit

Permalink
Merge pull request #61 from seatgeek/drain-with-benefits
Browse files Browse the repository at this point in the history
node drain with benefits
  • Loading branch information
burdandrei authored Feb 18, 2021
2 parents e6b0379 + 366a8f4 commit 5588052
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 10 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,18 @@ OPTIONS:
--detach Return immediately instead of entering monitor mode
--ignore-system Ignore system allows the drain to complete without stopping system job allocations. By default system jobs are stopped last.
--keep-ineligible Keep ineligible will maintain the node's scheduling ineligibility even if the drain is being disabled. This is useful when an existing drain is being cancelled but additional scheduling on the node is not desired.
--with-benefits Instead of flipping the regular drain flag it will make instance ineligible and will add a desired constraint to the task groups found on the node
--constraint
--operand
--value
--wait-for-pending will wait for all the moved jobs to reach running state
```

#### Examples

- `nomad-helper node drain --enable`
- `nomad-helper node --filter-class wrecker --filter-meta 'aws.ami-version=2.0.0-alpha14' --filter-meta 'aws.instance.availability-zone=us-east-1e' drain --noop --enable`

- `node --filter-meta "aws.ami-version=1.9.6" drain --enable --with-benefits --constraint meta.aws.ami-version --operand '=' --value 1.9.8 --wait-for-pending`
### eligibility

Filtering options can be found in the main `node` command help above
Expand Down
4 changes: 4 additions & 0 deletions command/job/move.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ func Move(c *cli.Context, logger *log.Logger) error {
return
}
_, _, err = nomadClient.Jobs().Register(job, nil)
if err != nil {
log.Errorf("failed to update job %s: %s", jobName, err)
return
}
log.Infof("Job %s was successfully moved!", name)
}(jobName)
}
Expand Down
134 changes: 131 additions & 3 deletions command/node/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@ import (
"context"
"fmt"
"sync"
"time"

"github.com/hashicorp/nomad/api"
nomadStructs "github.com/hashicorp/nomad/nomad/structs"
"github.com/seatgeek/nomad-helper/helpers"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli"
"github.com/urfave/cli"
)

func Drain(c *cli.Context, logger *log.Logger) error {
// Check that enable or disable is not set with monitor
if c.Bool("monitor") && (c.Bool("enable") || c.Bool("disable")) {
return fmt.Errorf("The -monitor flag cannot be used with the '-enable' or '-disable' flags")
return fmt.Errorf("-monitor flag cannot be used with the '-enable' or '-disable' flags")
}

// Check that we got either enable or disable, but not both.
Expand All @@ -29,6 +31,19 @@ func Drain(c *cli.Context, logger *log.Logger) error {
if c.Bool("force") && c.Bool("no-deadline") {
return fmt.Errorf("-force and -no-deadline are mutually exclusive")
}
newConstraint := &api.Constraint{}
if c.Bool("with-benefits") {
if c.String("constraint") == "" {
return fmt.Errorf("with-benefits selected, must provide new constrain name")
}
if c.String("operand") == "" {
return fmt.Errorf("with-benefits selected, must provide new constrain name")
}
if c.String("value") == "" {
return fmt.Errorf("with-benefits selected, must provide new constrain name")
}
newConstraint = api.NewConstraint(fmt.Sprintf("${%s}", c.String("constraint")), c.String("operand"), c.String("value"))
}

deadline := c.Duration("deadline")
if c.Bool("force") {
Expand All @@ -54,14 +69,127 @@ func Drain(c *cli.Context, logger *log.Logger) error {
}

if len(matches) == 0 {
return fmt.Errorf("Could not find any nodes matching provided filters")
return fmt.Errorf("could not find any nodes matching provided filters")
}

var wg sync.WaitGroup
ctx := context.Background()

for _, node := range matches {
log.Infof("Node %s (class: %s / version: %s)", node.Name, node.NodeClass, node.Attributes["nomad.version"])
if c.Bool("with-benefits") {
log.Infof("Drain mode with benefits selected, marking node as ineligible and starting to move the jobs to the specified constraint")
_, err := nomadClient.Nodes().ToggleEligibility(node.ID, false, nil)
if err != nil {
log.Errorf("Error updating scheduling eligibility for %s: %s", node.Name, err)
continue
}
// Bring the allocations running on the node
nodeAllocations, _, err := nomadClient.Nodes().Allocations(node.ID, nil)
if err != nil {
log.Errorf("Error updating scheduling eligibility for %s: %s", node.Name, err)
continue
}

for _, nodeAllocation := range nodeAllocations {
if *nodeAllocation.Job.Type != nomadStructs.JobTypeService {
log.Infof("Skipping %s because it's not a service job", nodeAllocation.JobID)
continue
}

if nodeAllocation.ClientStatus == nomadStructs.AllocClientStatusComplete || nodeAllocation.DesiredStatus == nomadStructs.AllocDesiredStatusStop {
log.Infof("Skipping %s because it's already complete", nodeAllocation.JobID)
continue
}

log.Infof("Found Allocation %s, for job %s, moving it", nodeAllocation.ID, nodeAllocation.JobID)

allocationJob := nodeAllocation.Job
existingConstraintAppended := false
for taskGroupIndex, taskGroup := range allocationJob.TaskGroups {
if *taskGroup.Name == nodeAllocation.TaskGroup {
for constraintIndex, constraint := range taskGroup.Constraints {
if constraint.LTarget == newConstraint.LTarget {
allocationJob.TaskGroups[taskGroupIndex].Constraints[constraintIndex] = newConstraint
existingConstraintAppended = true
}
}
if !existingConstraintAppended {
allocationJob.TaskGroups[taskGroupIndex].Constrain(newConstraint)
}
}
}
registerResponse, _, err := nomadClient.Jobs().Register(allocationJob, nil)
if err != nil {
return fmt.Errorf("failed to move taskgroup %s for job %s: %s", nodeAllocation.TaskGroup, nodeAllocation.JobID, err)
}
if c.Bool("wait-for-pending") {
log.Infof("Waiting for successfully placing the moved job")
// wait for the evaluation to be available
for {
time.Sleep(1 * time.Second)
evaluation, _, err := nomadClient.Evaluations().Info(registerResponse.EvalID, nil)
if err != nil {
continue
}

if evaluation.Status == nomadStructs.EvalStatusCancelled || evaluation.Status == nomadStructs.EvalStatusFailed {
logger.Errorf("Could not evaluate the job: %s", evaluation.StatusDescription)
continue
}

if evaluation.Status == nomadStructs.EvalStatusComplete {
log.Infof("Evaluation %s for job %s completed", evaluation.ID, nodeAllocation.JobID)
break
}
}

// wait for the allocations to be available
for {
time.Sleep(1 * time.Second)
evaluations, _, err := nomadClient.Evaluations().List(nil)
if err != nil {
continue
}
for _, evaluation := range evaluations {
if &evaluation.JobID == allocationJob.ID && evaluation.Status == nomadStructs.EvalStatusBlocked {
log.Infof("Job %s got blocked evaluations", allocationJob.ID)
continue
}
}

break
}

// waiting for allocation to be placed
for {
pendingAllocations := 0
allocations, _, err := nomadClient.Allocations().List(nil)
if err != nil {
continue
}
for _, allocation := range allocations {
if allocation.JobID == *allocationJob.ID {
for _, ts := range allocation.TaskStates {
if ts.State == nomadStructs.TaskStatePending {
pendingAllocations++
log.Infof("Allocation %s for job %s is pending, waiting for this to be resolved", allocation.ID, *allocationJob.ID)
}
}
}
}
if pendingAllocations > 0 {
continue
}
break
}

log.Infof("All allocations for job %s are not pending anmore", *allocationJob.ID)
}
log.Infof("Job %s was successfully moved!", *allocationJob.ID)
}
continue
}

// in monitor mode we don't do any change to node state
if c.Bool("monitor") {
Expand Down
27 changes: 25 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,50 @@ go 1.15
require (
github.com/Jeffail/tunny v0.0.0-20210126202424-1b37d6cb867a
github.com/alecthomas/chroma v0.8.2
github.com/armon/go-metrics v0.3.6 // indirect
github.com/buildkite/terminal-to-html v3.2.0+incompatible
github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect
github.com/dlclark/regexp2 v1.4.0 // indirect
github.com/fatih/color v1.10.0 // indirect
github.com/golang/protobuf v1.4.3 // indirect
github.com/gorilla/mux v1.8.0
github.com/hashicorp/consul/api v1.8.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-hclog v0.15.0 // indirect
github.com/hashicorp/go-memdb v1.3.1 // indirect
github.com/hashicorp/go-plugin v1.4.0 // indirect
github.com/hashicorp/go-version v1.2.1 // indirect
github.com/hashicorp/nomad v1.0.3
github.com/hashicorp/nomad/api v0.0.0-20210212165259-def4d61617ae
github.com/hashicorp/nomad/api v0.0.0-20210216144318-cb4443d0dc72
github.com/hashicorp/raft v1.2.0 // indirect
github.com/hashicorp/yamux v0.0.0-20200609203250-aecfd211c9ce // indirect
github.com/karlseguin/ccache v2.0.3+incompatible
github.com/karlseguin/expect v1.0.1 // indirect
github.com/magefile/mage v1.11.0 // indirect
github.com/mattn/go-runewidth v0.0.10 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db
github.com/mitchellh/copystructure v1.1.1 // indirect
github.com/mitchellh/go-testing-interface v1.14.1 // indirect
github.com/mitchellh/hashstructure v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.4.1 // indirect
github.com/oklog/run v1.1.0 // indirect
github.com/olekukonko/tablewriter v0.0.5
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/schollz/progressbar/v2 v2.15.0
github.com/sirupsen/logrus v1.7.0
github.com/sirupsen/logrus v1.7.1
github.com/stretchr/testify v1.7.0 // indirect
github.com/urfave/cli v1.22.5
github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 // indirect
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad // indirect
golang.org/x/net v0.0.0-20210119194325-5f4716e94777 // indirect
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c // indirect
golang.org/x/text v0.3.5 // indirect
google.golang.org/genproto v0.0.0-20210212180131-e7f2df4ecc2d // indirect
google.golang.org/grpc v1.35.0 // indirect
gopkg.in/karlseguin/expect.v1 v1.0.1 // indirect
gopkg.in/workanator/go-ataman.v1 v1.0.0-20201223053604-e3b73d2e8108
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
)
Loading

0 comments on commit 5588052

Please sign in to comment.