Skip to content

Commit

Permalink
Destroy partially migrated alloc dirs
Browse files Browse the repository at this point in the history
Test that snapshot errors don't return a valid tar currently fails.
  • Loading branch information
schmichael committed Nov 16, 2017
1 parent 3d437fa commit 0f44008
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 1 deletion.
10 changes: 9 additions & 1 deletion client/alloc_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ func (r *AllocRunner) Run() {
r.allocDirLock.Unlock()

if err != nil {
r.logger.Printf("[ERR] client: failed to build task directories: %v", err)
r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err)
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
return
}
Expand All @@ -841,6 +841,14 @@ func (r *AllocRunner) Run() {

// Soft-fail on migration errors
r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err)

// Recreate alloc dir to ensure a clean slate
r.allocDir.Destroy()
if err := r.allocDir.Build(); err != nil {
r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err)
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup))
return
}
}

// Check if the allocation is in a terminal status. In this case, we don't
Expand Down
1 change: 1 addition & 0 deletions client/allocdir/alloc_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ func (d *AllocDir) Destroy() error {
mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to remove alloc dir %q: %v", d.AllocDir, err))
}

d.built = false
return mErr.ErrorOrNil()
}

Expand Down
85 changes: 85 additions & 0 deletions command/agent/alloc_endpoint_test.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
package agent

import (
"archive/tar"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/http/httptest"
"os"
"reflect"
"strings"
"testing"

"github.com/golang/snappy"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/nomad"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -352,6 +358,85 @@ func TestHTTP_AllocSnapshot_WithMigrateToken(t *testing.T) {
})
}

// TestHTTP_AllocSnapshot_Atomic ensures that when a client encounters an error
// snapshotting a valid tar is not returned.
func TestHTTP_AllocSnapshot_Atomic(t *testing.T) {
t.Parallel()
httpTest(t, nil, func(s *TestAgent) {
// Create an alloc
state := s.server.State()
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
alloc.Job.TaskGroups[0].Tasks[0].Config["run_for"] = "30s"
alloc.NodeID = s.client.NodeID()
state.UpsertJobSummary(998, mock.JobSummary(alloc.JobID))
if err := state.UpsertAllocs(1000, []*structs.Allocation{alloc.Copy()}); err != nil {
t.Fatalf("error upserting alloc: %v", err)
}

// Wait for the client to run it
testutil.WaitForResult(func() (bool, error) {
if _, err := s.client.GetClientAlloc(alloc.ID); err != nil {
return false, err
}

serverAlloc, err := state.AllocByID(nil, alloc.ID)
if err != nil {
return false, err
}

return serverAlloc.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf(serverAlloc.ClientStatus)
}, func(err error) {
t.Fatalf("client not running alloc: %v", err)
})

// Now write to its shared dir
allocDirI, err := s.client.GetAllocFS(alloc.ID)
if err != nil {
t.Fatalf("unable to find alloc dir: %v", err)
}
allocDir := allocDirI.(*allocdir.AllocDir)

// Remove the task dir to break Snapshot
os.RemoveAll(allocDir.TaskDirs["web"].LocalDir)

// Assert Snapshot fails
if err := allocDir.Snapshot(ioutil.Discard); err == nil {
t.Errorf("expected Snapshot() to fail but it did not")
} else {
s.logger.Printf("[DEBUG] agent.test: snapshot returned error: %v", err)
}

// Make the HTTP request to ensure the Snapshot error is
// propagated through to the HTTP layer and that a valid tar
// just isn't emitted.
respW := httptest.NewRecorder()
req, err := http.NewRequest("GET", fmt.Sprintf("/v1/client/allocation/%s/snapshot", alloc.ID), nil)
if err != nil {
t.Fatalf("err: %v", err)
}

// Make the request via the mux to make sure the error returned
// by Snapshot is properly propogated via HTTP
s.Server.mux.ServeHTTP(respW, req)
resp := respW.Result()
t.Logf("HTTP Response Status Code: %d", resp.StatusCode)
r := tar.NewReader(resp.Body)
for {
header, err := r.Next()
if err != nil {
if err == io.EOF {
t.Fatalf("Looks like a valid tar file to me?")
}
t.Logf("Yay! An error: %v", err)
return
}

t.Logf("Valid file returned: %s", header.Name)
}
})
}

func TestHTTP_AllocGC(t *testing.T) {
t.Parallel()
httpTest(t, nil, func(s *TestAgent) {
Expand Down

0 comments on commit 0f44008

Please sign in to comment.