Skip to content

Commit

Permalink
#14 oom killer: cleaner logging and unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
towe75 committed Dec 24, 2019
1 parent 583c7d1 commit 94dc4af
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 3 deletions.
50 changes: 49 additions & 1 deletion driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func createBasicResources() *drivers.Resources {
res := drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
// MemoryMB: 256,
MemoryMB: 100,
},
Cpu: structs.AllocatedCpuResources{
CpuShares: 250,
Expand Down Expand Up @@ -630,6 +630,54 @@ func TestPodmanDriver_Init(t *testing.T) {

}

// test oom flag propagation
func TestPodmanDriver_OOM(t *testing.T) {
if !tu.IsCI() {
t.Parallel()
}

taskCfg := newTaskConfig("", []string{
// Incrementally creates a bigger and bigger variable.
"sh",
"-c",
"x=a; while true; do eval x='$x$x'; done",
})
// enable --init
taskCfg.Init = true

task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "oom",
AllocID: uuid.Generate(),
Resources: createBasicResources(),
}
// limit memory to 10MB to trigger oom soon enough
task.Resources.NomadResources.Memory.MemoryMB = 10
require.NoError(t, task.EncodeConcreteDriverConfig(&taskCfg))

d := podmanDriverHarness(t, nil)
cleanup := d.MkAllocDir(task, true)
defer cleanup()

_, _, err := d.StartTask(task)
require.NoError(t, err)

defer d.DestroyTask(task.ID, true)

// Attempt to wait
waitCh, err := d.WaitTask(context.Background(), task.ID)
require.NoError(t, err)

select {
case res := <-waitCh:
require.False(t, res.Successful(), "Should have failed because of oom but was successful")
require.True(t, res.OOMKilled, "OOM Flag not set")
require.Contains(t, res.Err.Error(), "OOM killer")
case <-time.After(time.Duration(tu.TestMultiplier()*1) * time.Second):
t.Fatalf("Container did not exit in time")
}
}

// read a tasks logfile into a string, fail on error
func readLogfile(t *testing.T, task *drivers.TaskConfig) string {
logfile := filepath.Join(filepath.Dir(task.StdoutPath), fmt.Sprintf("%s.stdout.0", task.Name))
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75 h1:f0n1xnMSmBLzVf
github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75/go.mod h1:g2644b03hfBX9Ov0ZBDgXXens4rxSxmqFBbhvKv2yVA=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw=
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
Expand Down Expand Up @@ -575,6 +576,7 @@ github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFB
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.0.0/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
Expand Down
9 changes: 7 additions & 2 deletions handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,18 @@ func (h *TaskHandle) MonitorContainer() {
h.stateLock.Lock()
h.procState = drivers.TaskStateExited
if err != nil {
h.logger.Error("Failt to inspect stopped container, can not get exit code", "err", err)
h.exitResult.Err = fmt.Errorf("Driver was unable to get the exit code. %s: %v", h.containerID, err)
h.logger.Error("Failt to inspect stopped container, can not get exit code", "container", h.containerID, "err", err)
h.exitResult.Signal = 0
h.completedAt = time.Now()
} else {
h.exitResult.ExitCode = int(inspectData.State.ExitCode)
h.exitResult.OOMKilled = inspectData.State.OOMKilled
h.completedAt = inspectData.State.FinishedAt
if inspectData.State.OOMKilled {
h.exitResult.OOMKilled = true
h.exitResult.Err = fmt.Errorf("Podman container killed by OOM killer")
h.logger.Error("Podman container killed by OOM killer", "container", h.containerID)
}
}
if h.exitChannel != nil {
h.exitChannel <- h.exitResult
Expand Down

0 comments on commit 94dc4af

Please sign in to comment.