Merge remote-tracking branch 'origin/master' into f-ui/alloc-fs

* origin/master: (32 commits) Added additional test cases and fixed go test case update changelog Add Mirage-toggling via environment variable (#5899) changelog: Add entries for windows fixes fifo: Safer access to Conn run post-run/post-stop task runner hooks Fail alloc if alloc runner prestart hooks fail address review comments changelog Missed one revert of backwards compatibility for node drain Improve test cases for detecting content type Undo removal of node drain compat changes Updated with suggestions. fifo: Close connections and cleanup lock handling logmon: Add windows compatibility test client: defensive against getting stale alloc updates Infer content type in alloc fs stat endpoint appveyor: Run logmon tests fifo: Require that fifos do not exist for create vendor: Use dani fork of go-winio ...
hashicorp · Jul 2, 2019 · 44fc18e · 44fc18e
2 parents c2e782b + b57ea22
commit 44fc18e
Show file tree

Hide file tree

Showing 54 changed files with 1,391 additions and 732 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,9 @@
 ## 0.9.4 (Unreleased)
 
 IMPROVEMENTS:
-
+ * core: removed deprecated upgrade path code pertaining to older versions of Nomad [[GH-5894](https://github.com/hashicorp/nomad/issues/5894)]
  * api: use region from job hcl when not provided as query parameter in job registration and plan endpoints [[GH-5664](https://github.com/hashicorp/nomad/pull/5664)]
+ * api: infer content type of file in alloc filesystem stat endpoint [[GH-5907](https://github.com/hashicorp/nomad/issues/5907)]
  * metrics: add namespace label as appropriate to metrics [[GH-5847](https://github.com/hashicorp/nomad/issues/5847)]
  * ui: Moved client status, draining, and eligibility fields into single state column [[GH-5789](https://github.com/hashicorp/nomad/pull/5789)]
 
@@ -11,7 +12,9 @@ BUG FIXES:
  * core: Improved job spec parsing error messages for variable interpolation failures [[GH-5844](https://github.com/hashicorp/nomad/issues/5844)]
  * core: Handle error case when attempting to stop a non-existent allocation [[GH-5865](https://github.com/hashicorp/nomad/issues/5865)]
  * client: Fixed regression that prevented registering multiple services with the same name but different ports in Consul correctly [[GH-5829](https://github.com/hashicorp/nomad/issues/5829)]
+ * client: Fixed a race condition when performing local task restarts that would result in incorrect task not found errors on Windows [[GH-5899](https://github.com/hashicorp/nomad/pull/5889)]
  * driver: Fixed an issue preventing external driver plugins from launching executor process [[GH-5726](https://github.com/hashicorp/nomad/issues/5726)]
+ * driver: Fixed an issue preventing local task restarts on Windows [[GH-5864](https://github.com/hashicorp/nomad/pull/5864)]
  * driver/docker: Fixed a bug mounting relative paths on Windows [[GH-5811](https://github.com/hashicorp/nomad/issues/5811)]
  * driver/exec: Upgraded libcontainer dependency to avoid zombie `runc:[1:CHILD]]` processes [[GH-5851](https://github.com/hashicorp/nomad/issues/5851)]
  * metrics: Upgrade prometheus client to avoid label conflicts [[GH-5850](https://github.com/hashicorp/nomad/issues/5850)]

diff --git a/api/fs.go b/api/fs.go
@@ -20,11 +20,12 @@ const (
 
 // AllocFileInfo holds information about a file inside the AllocDir
 type AllocFileInfo struct {
-	Name     string
-	IsDir    bool
-	Size     int64
-	FileMode string
-	ModTime  time.Time
+	Name        string
+	IsDir       bool
+	Size        int64
+	FileMode    string
+	ModTime     time.Time
+	ContentType string
 }
 
 // StreamFrame is used to frame data of a file when streaming

diff --git a/appveyor.yml b/appveyor.yml
@@ -46,6 +46,7 @@ test_script:
       gotestsum --junitfile results.xml
            github.com/hashicorp/nomad/drivers/docker
            github.com/hashicorp/nomad/client/lib/fifo
+           github.com/hashicorp/nomad/client/logmon
 # on_finish:
 #   - ps: |
 #       Push-AppveyorArtifact (Resolve-Path .\results.xml)

diff --git a/client/allocdir/alloc_dir.go b/client/allocdir/alloc_dir.go
@@ -11,6 +11,9 @@ import (
 	"sync"
 	"time"
 
+	"net/http"
+	"strings"
+
 	hclog "github.com/hashicorp/go-hclog"
 	multierror "github.com/hashicorp/go-multierror"
 	cstructs "github.com/hashicorp/nomad/client/structs"
@@ -392,15 +395,41 @@ func (d *AllocDir) Stat(path string) (*cstructs.AllocFileInfo, error) {
 		return nil, err
 	}
 
+	contentType := detectContentType(info, p)
+
 	return &cstructs.AllocFileInfo{
-		Size:     info.Size(),
-		Name:     info.Name(),
-		IsDir:    info.IsDir(),
-		FileMode: info.Mode().String(),
-		ModTime:  info.ModTime(),
+		Size:        info.Size(),
+		Name:        info.Name(),
+		IsDir:       info.IsDir(),
+		FileMode:    info.Mode().String(),
+		ModTime:     info.ModTime(),
+		ContentType: contentType,
 	}, nil
 }
 
+// detectContentType tries to infer the file type by reading the first
+// 512 bytes of the file. Json file extensions are special cased.
+func detectContentType(fileInfo os.FileInfo, path string) string {
+	contentType := "application/octet-stream"
+	if !fileInfo.IsDir() {
+		f, err := os.Open(path)
+		// Best effort content type detection
+		// We ignore errors because this is optional information
+		if err == nil {
+			fileBytes := make([]byte, 512)
+			_, err := f.Read(fileBytes)
+			if err == nil {
+				contentType = http.DetectContentType(fileBytes)
+			}
+		}
+	}
+	// Special case json files
+	if strings.HasSuffix(path, ".json") {
+		contentType = "application/json"
+	}
+	return contentType
+}
+
 // ReadAt returns a reader for a file at the path relative to the alloc dir
 func (d *AllocDir) ReadAt(path string, offset int64) (io.ReadCloser, error) {
 	if escapes, err := structs.PathEscapesAllocDir("", path); err != nil {

diff --git a/client/allocdir/alloc_dir_test.go b/client/allocdir/alloc_dir_test.go
@@ -472,3 +472,32 @@ func TestPathFuncs(t *testing.T) {
 		t.Errorf("%q is not empty. empty=%v error=%v", dir, empty, err)
 	}
 }
+
+func TestAllocDir_DetectContentType(t *testing.T) {
+	require := require.New(t)
+	inputPath := "input/"
+	var testFiles []string
+	err := filepath.Walk(inputPath, func(path string, info os.FileInfo, err error) error {
+		if !info.IsDir() {
+			testFiles = append(testFiles, path)
+		}
+		return err
+	})
+	require.Nil(err)
+
+	expectedEncodings := map[string]string{
+		"input/happy.gif": "image/gif",
+		"input/image.png": "image/png",
+		"input/nomad.jpg": "image/jpeg",
+		"input/test.bin":  "application/octet-stream",
+		"input/test.json": "application/json",
+		"input/test.txt":  "text/plain; charset=utf-8",
+		"input/test.go":   "text/plain; charset=utf-8",
+	}
+	for _, file := range testFiles {
+		fileInfo, err := os.Stat(file)
+		require.Nil(err)
+		res := detectContentType(fileInfo, file)
+		require.Equal(expectedEncodings[file], res, "unexpected output for %v", file)
+	}
+}
diff --git a/client/allocdir/input/happy.gif b/client/allocdir/input/happy.gif
diff --git a/client/allocdir/input/image.png b/client/allocdir/input/image.png
diff --git a/client/allocdir/input/nomad.jpg b/client/allocdir/input/nomad.jpg
diff --git a/client/allocdir/input/test.bin b/client/allocdir/input/test.bin
diff --git a/client/allocdir/input/test.go b/client/allocdir/input/test.go
@@ -0,0 +1,26 @@
+package allocdir
+
+import (
+	"os"
+	"syscall"
+)
+
+// linkDir hardlinks src to dst. The src and dst must be on the same filesystem.
+func linkDir(src, dst string) error {
+	return syscall.Link(src, dst)
+}
+
+// unlinkDir removes a directory link.
+func unlinkDir(dir string) error {
+	return syscall.Unlink(dir)
+}
+
+// createSecretDir creates the secrets dir folder at the given path
+func createSecretDir(dir string) error {
+	return os.MkdirAll(dir, 0777)
+}
+
+// removeSecretDir removes the secrets dir folder
+func removeSecretDir(dir string) error {
+	return os.RemoveAll(dir)
+}
diff --git a/client/allocdir/input/test.json b/client/allocdir/input/test.json
@@ -0,0 +1,3 @@
+{
+"test":"test"
+}
diff --git a/client/allocdir/input/test.txt b/client/allocdir/input/test.txt
@@ -0,0 +1 @@
+hello world
diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
@@ -248,10 +248,18 @@ func (ar *allocRunner) Run() {
 	default:
 	}
 
+	// When handling (potentially restored) terminal alloc, ensure tasks and post-run hooks are run
+	// to perform any cleanup that's necessary, potentially not done prior to earlier termination
+
 	// Run the prestart hooks if non-terminal
 	if ar.shouldRun() {
 		if err := ar.prerun(); err != nil {
 			ar.logger.Error("prerun failed", "error", err)
+
+			for _, tr := range ar.tasks {
+				tr.MarkFailedDead(fmt.Sprintf("failed to setup runner: %v", err))
+			}
+
 			goto POST
 		}
 	}

diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go
@@ -131,3 +131,163 @@ func TestAllocRunner_Restore_RunningTerminal(t *testing.T) {
 	require.Equal(t, events[2].Type, structs.TaskStarted)
 	require.Equal(t, events[3].Type, structs.TaskTerminated)
 }
+
+// TestAllocRunner_Restore_CompletedBatch asserts that restoring a completed
+// batch alloc doesn't run it again
+func TestAllocRunner_Restore_CompletedBatch(t *testing.T) {
+	t.Parallel()
+
+	// 1. Run task and wait for it to complete
+	// 2. Start new alloc runner
+	// 3. Assert task didn't run again
+
+	alloc := mock.Alloc()
+	alloc.Job.Type = structs.JobTypeBatch
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config = map[string]interface{}{
+		"run_for": "2ms",
+	}
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc.Copy())
+	defer cleanup()
+
+	// Maintain state for subsequent run
+	conf.StateDB = state.NewMemDB(conf.Logger)
+
+	// Start and wait for task to be running
+	ar, err := NewAllocRunner(conf)
+	require.NoError(t, err)
+	go ar.Run()
+	defer destroy(ar)
+
+	testutil.WaitForResult(func() (bool, error) {
+		s := ar.AllocState()
+		if s.ClientStatus != structs.AllocClientStatusComplete {
+			return false, fmt.Errorf("expected complete, got %s", s.ClientStatus)
+		}
+		return true, nil
+	}, func(err error) {
+		require.NoError(t, err)
+	})
+
+	// once job finishes, it shouldn't run again
+	require.False(t, ar.shouldRun())
+	initialRunEvents := ar.AllocState().TaskStates[task.Name].Events
+	require.Len(t, initialRunEvents, 4)
+
+	ls, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name)
+	require.NoError(t, err)
+	require.NotNil(t, ls)
+	require.Equal(t, structs.TaskStateDead, ts.State)
+
+	// Start a new alloc runner and assert it gets stopped
+	conf2, cleanup2 := testAllocRunnerConfig(t, alloc)
+	defer cleanup2()
+
+	// Use original statedb to maintain hook state
+	conf2.StateDB = conf.StateDB
+
+	// Restore, start, and wait for task to be killed
+	ar2, err := NewAllocRunner(conf2)
+	require.NoError(t, err)
+
+	require.NoError(t, ar2.Restore())
+
+	go ar2.Run()
+	defer destroy(ar2)
+
+	// AR waitCh must be closed even when task doesn't run again
+	select {
+	case <-ar2.WaitCh():
+	case <-time.After(10 * time.Second):
+		require.Fail(t, "alloc.waitCh wasn't closed")
+	}
+
+	// TR waitCh must be closed too!
+	select {
+	case <-ar2.tasks[task.Name].WaitCh():
+	case <-time.After(10 * time.Second):
+		require.Fail(t, "tr.waitCh wasn't closed")
+	}
+
+	// Assert that events are unmodified, which they would if task re-run
+	events := ar2.AllocState().TaskStates[task.Name].Events
+	require.Equal(t, initialRunEvents, events)
+}
+
+// TestAllocRunner_PreStartFailuresLeadToFailed asserts that if an alloc
+// prestart hooks failed, then the alloc and subsequent tasks transition
+// to failed state
+func TestAllocRunner_PreStartFailuresLeadToFailed(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.Alloc()
+	alloc.Job.Type = structs.JobTypeBatch
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config = map[string]interface{}{
+		"run_for": "2ms",
+	}
+	alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{
+		Attempts: 0,
+	}
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc.Copy())
+	defer cleanup()
+
+	// Maintain state for subsequent run
+	conf.StateDB = state.NewMemDB(conf.Logger)
+
+	// Start and wait for task to be running
+	ar, err := NewAllocRunner(conf)
+	require.NoError(t, err)
+
+	ar.runnerHooks = append(ar.runnerHooks, &allocFailingPrestartHook{})
+
+	go ar.Run()
+	defer destroy(ar)
+
+	select {
+	case <-ar.WaitCh():
+	case <-time.After(10 * time.Second):
+		require.Fail(t, "alloc.waitCh wasn't closed")
+	}
+
+	testutil.WaitForResult(func() (bool, error) {
+		s := ar.AllocState()
+		if s.ClientStatus != structs.AllocClientStatusFailed {
+			return false, fmt.Errorf("expected complete, got %s", s.ClientStatus)
+		}
+		return true, nil
+	}, func(err error) {
+		require.NoError(t, err)
+	})
+
+	// once job finishes, it shouldn't run again
+	require.False(t, ar.shouldRun())
+	initialRunEvents := ar.AllocState().TaskStates[task.Name].Events
+	require.Len(t, initialRunEvents, 2)
+
+	ls, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name)
+	require.NoError(t, err)
+	require.NotNil(t, ls)
+	require.NotNil(t, ts)
+	require.Equal(t, structs.TaskStateDead, ts.State)
+	require.True(t, ts.Failed)
+
+	// TR waitCh must be closed too!
+	select {
+	case <-ar.tasks[task.Name].WaitCh():
+	case <-time.After(10 * time.Second):
+		require.Fail(t, "tr.waitCh wasn't closed")
+	}
+}
+
+type allocFailingPrestartHook struct{}
+
+func (*allocFailingPrestartHook) Name() string { return "failing_prestart" }
+
+func (*allocFailingPrestartHook) Prerun() error {
+	return fmt.Errorf("failing prestart hooks")
+}