Skip to content
This repository has been archived by the owner on Mar 16, 2024. It is now read-only.

Commit

Permalink
Harden logic around job output saving
Browse files Browse the repository at this point in the history
Do not attempt to get job output if the helper sidecar is not known to
be running. This will reduce many spurious errors when trying to get
output on of a job when it's not ready or running.

Also don't ignore errors for job shutdown.

Signed-off-by: Darren Shepherd <[email protected]>
  • Loading branch information
ibuildthecloud committed Jun 17, 2023
1 parent 55e81bb commit 7a4027e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
12 changes: 8 additions & 4 deletions pkg/controller/jobs/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,18 @@ func (h *Handler) SaveJobOutput(req router.Request, resp router.Response) error
names.Insert(name)
}

helperRunning := false
for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
if status.Name == jobs.Helper && status.State.Running != nil {
helperRunning = true
}
// TODO: check that helper is really running, otherwise early exit
if status.State.Terminated != nil {
names.Delete(status.Name)
}
}

if names.Len() > 0 {
if names.Len() > 0 || !helperRunning {
return nil
}

Expand Down Expand Up @@ -170,7 +175,6 @@ func (h *Handler) SaveJobOutput(req router.Request, resp router.Response) error
return err
}

// ignore error, it always exits with exit code 137
_, _ = h.runCommand(req.Ctx, pod, "/usr/local/bin/acorn-job-helper-shutdown")
return nil
_, err = h.runCommand(req.Ctx, pod, "/usr/local/bin/acorn-job-helper-shutdown")
return err
}
1 change: 1 addition & 0 deletions scripts/acorn-job-helper-init
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/sh
mkfifo /tmp/.fifo
echo > /tmp/.fifo
sleep .2

0 comments on commit 7a4027e

Please sign in to comment.