diff --git a/CHANGELOG.md b/CHANGELOG.md index a71978ec71f..199ebe5cc43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ IMPROVEMENTS: * client: Create new process group on process startup. [[GH-3572](https://github.com/hashicorp/nomad/issues/3572)] + * driver/rkt: Enable stats collection for rkt tasks [[GH-4188](https://github.com/hashicorp/nomad/pull/4188)] BUG FIXES: * driver/exec: Create process group for Windows process and send Ctrl-Break signal on Shutdown [[GH-2117](https://github.com/hashicorp/nomad/issues/2117)] diff --git a/GNUmakefile b/GNUmakefile index e221a89da08..048951b0586 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -243,8 +243,7 @@ test: ## Run the Nomad test suite and/or the Nomad UI test suite .PHONY: test-nomad test-nomad: dev ## Run Nomad test suites @echo "==> Running Nomad test suites:" - @NOMAD_TEST_RKT=1 \ - go test $(if $(VERBOSE),-v) \ + @go test $(if $(VERBOSE),-v) \ -cover \ -timeout=900s \ -tags="$(if $(HAS_LXC),lxc)" ./... $(if $(VERBOSE), >test.log ; echo $$? > exit-code) diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go index d854c56a097..3fab5c8cf5c 100644 --- a/client/driver/exec_test.go +++ b/client/driver/exec_test.go @@ -331,7 +331,7 @@ func TestExecDriver_HandlerExec(t *testing.T) { handle := resp.Handle // Exec a command that should work and dump the environment - out, code, err := handle.Exec(context.Background(), "/bin/sh", []string{"-c", "env | grep NOMAD"}) + out, code, err := handle.Exec(context.Background(), "/bin/sh", []string{"-c", "env | grep ^NOMAD"}) if err != nil { t.Fatalf("error exec'ing stat: %v", err) } diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 0a990dc839d..1ca82ebb3d5 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -194,7 +194,10 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, log return nil, lastErr } - // This is a successful landing. + // This is a successful landing; log if its not the first attempt. + if try > 1 { + logger.Printf("[DEBUG] driver.rkt: retrieved network info for pod UUID %s on attempt %d", uuid, try) + } return &cstructs.DriverNetwork{ PortMap: portmap, IP: status.Networks[0].IP.String(), @@ -211,7 +214,7 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, log } waitTime := getJitteredNetworkRetryTime() - logger.Printf("[DEBUG] driver.rkt: getting network info for pod UUID %s failed attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime) + logger.Printf("[DEBUG] driver.rkt: failed getting network info for pod UUID %s attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime) time.Sleep(waitTime) } return nil, fmt.Errorf("timed out, last error: %v", lastErr) @@ -665,9 +668,13 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, return nil, fmt.Errorf("failed to set executor context: %v", err) } + // Enable ResourceLimits to place the executor in a parent cgroup of + // the rkt container. This allows stats collection via the executor to + // work just like it does for exec. execCmd := &executor.ExecCommand{ - Cmd: absPath, - Args: runArgs, + Cmd: absPath, + Args: runArgs, + ResourceLimits: true, } ps, err := execIntf.LaunchCmd(execCmd) if err != nil { @@ -818,7 +825,7 @@ func (h *rktHandle) Kill() error { } func (h *rktHandle) Stats() (*cstructs.TaskResourceUsage, error) { - return nil, DriverStatsNotImplemented + return h.executor.Stats() } func (h *rktHandle) run() { diff --git a/client/driver/rkt_test.go b/client/driver/rkt_test.go index 914c704ed7a..3aae9ff57c4 100644 --- a/client/driver/rkt_test.go +++ b/client/driver/rkt_test.go @@ -22,14 +22,12 @@ import ( "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/assert" - ctestutils "github.com/hashicorp/nomad/client/testutil" + ctestutil "github.com/hashicorp/nomad/client/testutil" ) func TestRktVersionRegex(t *testing.T) { + ctestutil.RktCompatible(t) t.Parallel() - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("NOMAD_TEST_RKT unset, skipping") - } inputRkt := "rkt version 0.8.1" inputAppc := "appc version 1.2.0" @@ -47,12 +45,9 @@ func TestRktVersionRegex(t *testing.T) { // The fingerprinter test should always pass, even if rkt is not installed. func TestRktDriver_Fingerprint(t *testing.T) { + ctestutil.RktCompatible(t) t.Parallel() - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) ctx := testDriverContexts(t, &structs.Task{Name: "foo", Driver: "rkt"}) d := NewRktDriver(ctx.DriverCtx) node := &structs.Node{ @@ -86,15 +81,11 @@ func TestRktDriver_Fingerprint(t *testing.T) { } func TestRktDriver_Start_DNS(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) - // TODO: use test server to load from a fixture task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -140,14 +131,11 @@ func TestRktDriver_Start_DNS(t *testing.T) { } func TestRktDriver_Start_Wait(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -214,14 +202,11 @@ func TestRktDriver_Start_Wait(t *testing.T) { } func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -270,14 +255,10 @@ func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) { } func TestRktDriver_Start_Wait_AllocDir(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - - ctestutils.RktCompatible(t) exp := []byte{'w', 'i', 'n'} file := "output.txt" @@ -347,13 +328,10 @@ func TestRktDriver_Start_Wait_AllocDir(t *testing.T) { // TestRktDriver_UserGroup asserts tasks may override the user and group of the // rkt image. func TestRktDriver_UserGroup(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) require := assert.New(t) task := &structs.Task{ @@ -408,13 +386,11 @@ func TestRktDriver_UserGroup(t *testing.T) { } func TestRktTrustPrefix(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) + task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -452,8 +428,9 @@ func TestRktTrustPrefix(t *testing.T) { } func TestRktTaskValidate(t *testing.T) { + ctestutil.RktCompatible(t) t.Parallel() - ctestutils.RktCompatible(t) + task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -476,16 +453,12 @@ func TestRktTaskValidate(t *testing.T) { } } -// TODO: Port Mapping test should be ran with proper ACI image and test the port access. -func TestRktDriver_PortsMapping(t *testing.T) { +func TestRktDriver_PortMapping(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -525,6 +498,7 @@ func TestRktDriver_PortsMapping(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } + defer resp.Handle.Kill() if resp.Network == nil { t.Fatalf("Expected driver to set a DriverNetwork, but it did not!") } @@ -549,14 +523,11 @@ func TestRktDriver_PortsMapping(t *testing.T) { // TestRktDriver_PortsMapping_Host asserts that port_map isn't required when // host networking is used. func TestRktDriver_PortsMapping_Host(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -591,6 +562,7 @@ func TestRktDriver_PortsMapping_Host(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } + defer resp.Handle.Kill() if resp.Network != nil { t.Fatalf("No network should be returned with --net=host but found: %#v", resp.Network) } @@ -613,14 +585,11 @@ func TestRktDriver_PortsMapping_Host(t *testing.T) { } func TestRktDriver_HandlerExec(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") - } - ctestutils.RktCompatible(t) task := &structs.Task{ Name: "etcd", Driver: "rkt", @@ -650,24 +619,28 @@ func TestRktDriver_HandlerExec(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } - - // Give the pod a second to start - time.Sleep(time.Second) + defer resp.Handle.Kill() // Exec a command that should work - out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--version"}) - if err != nil { - t.Fatalf("error exec'ing etcd --version: %v", err) - } - if code != 0 { - t.Fatalf("expected `etcd --version` to succeed but exit code was: %d\n%s", code, string(out)) - } - if expected := []byte("etcd version "); !bytes.HasPrefix(out, expected) { - t.Fatalf("expected output to start with %q but found:\n%q", expected, out) - } + testutil.WaitForResult(func() (bool, error) { + out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--version"}) + if err != nil { + return false, fmt.Errorf("error exec'ing etcd --version: %v", err) + } + if code != 0 { + return false, fmt.Errorf("expected `etcd --version` to succeed but exit code was: %d\n%s", code, string(out)) + } + if expected := []byte("etcd version "); !bytes.HasPrefix(out, expected) { + return false, fmt.Errorf("expected output to start with %q but found:\n%q", expected, out) + } + + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) // Exec a command that should fail - out, code, err = resp.Handle.Exec(context.TODO(), "/etcd", []string{"--kaljdshf"}) + out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--kaljdshf"}) if err != nil { t.Fatalf("error exec'ing bad command: %v", err) } @@ -683,15 +656,69 @@ func TestRktDriver_HandlerExec(t *testing.T) { } } -func TestRktDriver_Remove_Error(t *testing.T) { +func TestRktDriver_Stats(t *testing.T) { + ctestutil.RktCompatible(t) if !testutil.IsTravis() { t.Parallel() } - if os.Getenv("NOMAD_TEST_RKT") == "" { - t.Skip("skipping rkt tests") + + task := &structs.Task{ + Name: "etcd", + Driver: "rkt", + Config: map[string]interface{}{ + "trust_prefix": "coreos.com/etcd", + "image": "coreos.com/etcd:v2.0.4", + "command": "/etcd", + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: &structs.Resources{ + MemoryMB: 128, + CPU: 100, + }, } - ctestutils.RktCompatible(t) + ctx := testDriverContexts(t, task) + defer ctx.AllocDir.Destroy() + d := NewRktDriver(ctx.DriverCtx) + + if _, err := d.Prestart(ctx.ExecCtx, task); err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("err: %v", err) + } + defer resp.Handle.Kill() + + testutil.WaitForResult(func() (bool, error) { + stats, err := resp.Handle.Stats() + if err != nil { + return false, err + } + if stats == nil || stats.ResourceUsage == nil { + return false, fmt.Errorf("stats is nil") + } + if stats.ResourceUsage.CpuStats.TotalTicks == 0 { + return false, fmt.Errorf("cpu ticks unset") + } + if stats.ResourceUsage.MemoryStats.RSS == 0 { + return false, fmt.Errorf("rss stats unset") + } + return true, nil + }, func(err error) { + t.Fatalf("error: %v", err) + }) + +} + +func TestRktDriver_Remove_Error(t *testing.T) { + ctestutil.RktCompatible(t) + if !testutil.IsTravis() { + t.Parallel() + } // Removing a nonexistent pod should return an error if err := rktRemove("00000000-0000-0000-0000-000000000000"); err == nil { diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go index 97f994c444d..7d928459a53 100644 --- a/client/testutil/driver_compatible.go +++ b/client/testutil/driver_compatible.go @@ -3,6 +3,7 @@ package testutil import ( "os/exec" "runtime" + "sync" "syscall" "testing" ) @@ -38,13 +39,23 @@ func QemuCompatible(t *testing.T) { } } +var rktExists bool +var rktOnce sync.Once + func RktCompatible(t *testing.T) { - if runtime.GOOS == "windows" || syscall.Geteuid() != 0 { - t.Skip("Must be root on non-windows environments to run test") + if runtime.GOOS != "linux" || syscall.Geteuid() != 0 { + t.Skip("Must be root on Linux to run test") } + // else see if rkt exists - _, err := exec.Command("rkt", "version").CombinedOutput() - if err != nil { + rktOnce.Do(func() { + _, err := exec.Command("rkt", "version").CombinedOutput() + if err == nil { + rktExists = true + } + }) + + if !rktExists { t.Skip("Must have rkt installed for rkt specific tests to run") } } diff --git a/e2e/rescheduling/server_side_restarts_test.go b/e2e/rescheduling/server_side_restarts_test.go index fb260b0b864..9f0f402df4a 100644 --- a/e2e/rescheduling/server_side_restarts_test.go +++ b/e2e/rescheduling/server_side_restarts_test.go @@ -6,7 +6,6 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/jobspec" - _ "github.com/hashicorp/nomad/jobspec" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega"