Skip to content

Commit

Permalink
allocs: Add nomad alloc stop
Browse files Browse the repository at this point in the history
This adds a `nomad alloc stop` command that can be used to stop and
force migrate an allocation to a different node.

This is built on top of the AllocUpdateDesiredTransitionRequest and
explicitly limits the scope of access to that transition to expose it
under the alloc-lifecycle ACL.

The API returns the follow up eval that can be used as part of
monitoring in the CLI or parsed and used in an external tool.
  • Loading branch information
endocrimes committed Apr 3, 2019
1 parent e613da5 commit 35a2c90
Show file tree
Hide file tree
Showing 11 changed files with 463 additions and 3 deletions.
14 changes: 14 additions & 0 deletions api/allocations.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,20 @@ func (a *Allocations) GC(alloc *Allocation, q *QueryOptions) error {
return err
}

func (a *Allocations) Stop(alloc *Allocation, q *QueryOptions) (*AllocStopResponse, error) {
var resp AllocStopResponse
_, err := a.client.putQuery("/v1/allocation/"+alloc.ID+"/stop", nil, &resp, q)
return &resp, err
}

// AllocStopResponse is the response to an `AllocStopRequest`
type AllocStopResponse struct {
// EvalID is the id of the follow up evalution for the rescheduled alloc.
EvalID string

WriteMeta
}

// Allocation is used for serialization of allocations.
type Allocation struct {
ID string
Expand Down
40 changes: 38 additions & 2 deletions command/agent/alloc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,29 @@ func (s *HTTPServer) AllocsRequest(resp http.ResponseWriter, req *http.Request)
}

func (s *HTTPServer) AllocSpecificRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
allocID := strings.TrimPrefix(req.URL.Path, "/v1/allocation/")
reqSuffix := strings.TrimPrefix(req.URL.Path, "/v1/allocation/")

// tokenize the suffix of the path to get the alloc id and find the action
// invoked on the alloc id
tokens := strings.Split(reqSuffix, "/")
if len(tokens) > 2 || len(tokens) < 1 {
return nil, CodedError(404, resourceNotFoundErr)
}
allocID := tokens[0]

if len(tokens) == 1 {
return s.allocGet(allocID, resp, req)
}

switch tokens[1] {
case "stop":
return s.allocStop(allocID, resp, req)
}

return nil, CodedError(404, resourceNotFoundErr)
}

func (s *HTTPServer) allocGet(allocID string, resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {
return nil, CodedError(405, ErrInvalidMethod)
}
Expand Down Expand Up @@ -78,8 +100,22 @@ func (s *HTTPServer) AllocSpecificRequest(resp http.ResponseWriter, req *http.Re
return alloc, nil
}

func (s *HTTPServer) ClientAllocRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
func (s *HTTPServer) allocStop(allocID string, resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if !(req.Method == "POST" || req.Method == "PUT") {
return nil, CodedError(405, ErrInvalidMethod)
}

transReq := &structs.AllocStopRequest{
AllocID: allocID,
}
s.parseWriteRequest(req, &transReq.WriteRequest)

var out structs.AllocStopResponse
err := s.agent.RPC("Alloc.Stop", &transReq, &out)
return &out, err
}

func (s *HTTPServer) ClientAllocRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
reqSuffix := strings.TrimPrefix(req.URL.Path, "/v1/client/allocation/")

// tokenize the suffix of the path to get the alloc id and find the action
Expand Down
26 changes: 26 additions & 0 deletions command/agent/alloc_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,32 @@ func TestHTTP_AllocQuery_Payload(t *testing.T) {
})
}

func TestHTTP_AllocStop(t *testing.T) {
t.Parallel()
httpTest(t, nil, func(s *TestAgent) {
// Directly manipulate the state
state := s.Agent.server.State()
alloc := mock.Alloc()
require := require.New(t)
require.NoError(state.UpsertJobSummary(999, mock.JobSummary(alloc.JobID)))

require.NoError(state.UpsertAllocs(1000, []*structs.Allocation{alloc}))

// Make the HTTP request
req, err := http.NewRequest("POST", "/v1/allocation/"+alloc.ID+"/stop", nil)
require.NoError(err)
respW := httptest.NewRecorder()

// Make the request
obj, err := s.Server.AllocSpecificRequest(respW, req)
require.NoError(err)

a := obj.(*structs.AllocStopResponse)
require.NotEmpty(a.EvalID, "missing eval")
require.NotEmpty(a.Index, "missing index")
})
}

func TestHTTP_AllocStats(t *testing.T) {
t.Parallel()
require := require.New(t)
Expand Down
128 changes: 128 additions & 0 deletions command/alloc_stop.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package command

import (
"fmt"
"strings"
)

type AllocStopCommand struct {
Meta
}

func (a *AllocStopCommand) Help() string {
helpText := `
Usage: nomad alloc stop [options] <allocation>
Alias: nomad stop
stop an existing allocation. This command is used to signal a specific alloc
to shut down. When the allocation has been shut down, it will then be
rescheduled. An interactive monitoring session will display log lines as the
allocation completes shutting down. It is safe to exit the monitor early with
ctrl-c.
General Options:
` + generalOptionsUsage() + `
Stop Specific Options:
-detach
Return immediately instead of entering monitor mode. After the
stop command is submitted, a new evaluation ID is printed to the
screen, which can be used to examine the rescheduling evaluation using the
eval-status command.
-verbose
Show full information.
`
return strings.TrimSpace(helpText)
}

func (c *AllocStopCommand) Name() string { return "alloc stop" }

func (c *AllocStopCommand) Run(args []string) int {
var detach, verbose bool

flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&detach, "detach", false, "")
flags.BoolVar(&verbose, "verbose", false, "")

if err := flags.Parse(args); err != nil {
return 1
}

// Check that we got exactly one alloc
args = flags.Args()
if len(args) != 1 {
c.Ui.Error("This command takes one argument: <alloc-id>")
c.Ui.Error(commandErrorText(c))
return 1
}

allocID := args[0]

// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}

// Query the allocation info
if len(allocID) == 1 {
c.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
return 1
}

allocID = sanitizeUUIDPrefix(allocID)

// Get the HTTP client
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}

allocs, _, err := client.Allocations().PrefixList(allocID)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
return 1
}

if len(allocs) == 0 {
c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
return 1
}

if len(allocs) > 1 {
// Format the allocs
out := formatAllocListStubs(allocs, verbose, length)
c.Ui.Error(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out))
return 1
}

// Prefix lookup matched a single allocation
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
return 1
}

resp, err := client.Allocations().Stop(alloc, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error stopping allocation: %s", err))
return 1
}

if detach {
c.Ui.Output(resp.EvalID)
return 0
}

mon := newMonitor(c.Ui, client, length)
return mon.monitor(resp.EvalID, false)
}

func (a *AllocStopCommand) Synopsis() string {
return "Stop and reschedule a running allocation"
}
112 changes: 112 additions & 0 deletions command/alloc_stop_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package command

import (
"fmt"
"testing"

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/require"
)

func TestAllocStopCommand_Implements(t *testing.T) {
t.Parallel()
var _ cli.Command = &AllocStopCommand{}
}

func TestAllocStop_Fails(t *testing.T) {
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()

require := require.New(t)
ui := new(cli.MockUi)
cmd := &AllocStopCommand{Meta: Meta{Ui: ui}}

// Fails on misuse
require.Equal(cmd.Run([]string{"some", "garbage", "args"}), 1, "Expected failure")
require.Contains(ui.ErrorWriter.String(), commandErrorText(cmd), "Expected help output")
ui.ErrorWriter.Reset()

// Fails on connection failure
require.Equal(cmd.Run([]string{"-address=nope", "foobar"}), 1, "expected failure")
require.Contains(ui.ErrorWriter.String(), "Error querying allocation")
ui.ErrorWriter.Reset()

// Fails on missing alloc
require.Equal(cmd.Run([]string{"-address=" + url, "26470238-5CF2-438F-8772-DC67CFB0705C"}), 1)
require.Contains(ui.ErrorWriter.String(), "No allocation(s) with prefix or id")
ui.ErrorWriter.Reset()

// Fail on identifier with too few characters
require.Equal(cmd.Run([]string{"-address=" + url, "2"}), 1)
require.Contains(ui.ErrorWriter.String(), "must contain at least two characters")
ui.ErrorWriter.Reset()

// Identifiers with uneven length should produce a query result
require.Equal(cmd.Run([]string{"-address=" + url, "123"}), 1)
require.Contains(ui.ErrorWriter.String(), "No allocation(s) with prefix or id")
ui.ErrorWriter.Reset()
}

func TestAllocStop_Run(t *testing.T) {
srv, client, url := testServer(t, true, nil)
defer srv.Shutdown()

require := require.New(t)

// Wait for a node to be ready
testutil.WaitForResult(func() (bool, error) {
nodes, _, err := client.Nodes().List(nil)
if err != nil {
return false, err
}
for _, node := range nodes {
if _, ok := node.Drivers["mock_driver"]; ok &&
node.Status == structs.NodeStatusReady {
return true, nil
}
}
return false, fmt.Errorf("no ready nodes")
}, func(err error) {
t.Fatalf("err: %v", err)
})

ui := new(cli.MockUi)
cmd := &AllocStopCommand{Meta: Meta{Ui: ui}}

jobID := "job1_sfx"
job1 := testJob(jobID)
resp, _, err := client.Jobs().Register(job1, nil)
require.NoError(err)
if code := waitForSuccess(ui, client, fullId, t, resp.EvalID); code != 0 {
t.Fatalf("status code non zero saw %d", code)
}
// get an alloc id
allocId1 := ""
if allocs, _, err := client.Jobs().Allocations(jobID, false, nil); err == nil {
if len(allocs) > 0 {
allocId1 = allocs[0].ID
}
}
require.NotEmpty(allocId1, "unable to find allocation")

// Wait for alloc to be running
testutil.WaitForResult(func() (bool, error) {
alloc, _, err := client.Allocations().Info(allocId1, nil)
if err != nil {
return false, err
}
if alloc.ClientStatus == api.AllocClientStatusRunning {
return true, nil
}
return false, fmt.Errorf("alloc is not running, is: %s", alloc.ClientStatus)
}, func(err error) {
t.Fatalf("err: %v", err)
})

require.Equal(cmd.Run([]string{"-address=" + url, allocId1}), 0, "expected successful exit code")

ui.OutputWriter.Reset()
}
5 changes: 5 additions & 0 deletions command/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"alloc stop": func() (cli.Command, error) {
return &AllocStopCommand{
Meta: meta,
}, nil
},
"alloc fs": func() (cli.Command, error) {
return &AllocFSCommand{
Meta: meta,
Expand Down
Loading

0 comments on commit 35a2c90

Please sign in to comment.