Skip to content

Commit

Permalink
fix(k8s): tasks and tests would sometimes return empty logs
Browse files Browse the repository at this point in the history
This was due to kubectl sometimes failing to attach to Pods. We now
explicitly fetch the logs, with a bit of trickery to be able to do so
when copying artifacts out of the container after running the test/task
command.

This should also address the flaky integ tests we've been seeing in
minikube.
  • Loading branch information
edvald authored and thsig committed Feb 7, 2020
1 parent 78462b9 commit d0b025c
Show file tree
Hide file tree
Showing 10 changed files with 238 additions and 118 deletions.
6 changes: 4 additions & 2 deletions garden-service/src/plugins/kubernetes/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -441,10 +441,12 @@ export class KubeApi {
if (typeof output.then === "function") {
return (
output
// return the result body direcly
// return the result body directly if applicable
.then((res: any) => {
if (isPlainObject(res) && res["body"] !== undefined) {
if (isPlainObject(res) && res.hasOwnProperty("body")) {
return res["body"]
} else {
return res
}
})
// the API errors are not properly formed Error objects
Expand Down
97 changes: 63 additions & 34 deletions garden-service/src/plugins/kubernetes/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { Writable } from "stream"
import { ChildProcess } from "child_process"
import { sleep, uniqByName } from "../../util/util"
import { KubeApi } from "./api"
import { checkPodStatus } from "./status/pod"
import { checkPodStatus, getPodLogs } from "./status/pod"
import { KubernetesServerResource } from "./types"
import { ServiceState } from "../../types/service"
import { RunModuleParams } from "../../types/plugin/module/runModule"
Expand Down Expand Up @@ -109,8 +109,9 @@ export async function runAndCopy({
)
}

// We start the container and let it run while we execute the target command, and then copy the artifacts
spec.containers[0].command = ["sh", "-c", "sleep 86400"]
// We start the container with a named pipe and tail that, to get the logs from the actual command
// we plan on running. Then we sleep, so that we can copy files out of the container.
spec.containers[0].command = ["sh", "-c", "mkfifo /tmp/output && tail /tmp/output && sleep 86400"]
} else {
if (args) {
spec.containers[0].args = args
Expand Down Expand Up @@ -154,14 +155,7 @@ export async function runAndCopy({
// Specifically look for error indicating `sh` is missing, and report with helpful message.
const containerStatus = pod!.status.containerStatuses![0]

// FIXME: use optional chaining when TS 3.7 is out
if (
containerStatus &&
containerStatus.state &&
containerStatus.state.terminated &&
containerStatus.state.terminated.message &&
containerStatus.state.terminated.message.includes("not found")
) {
if (containerStatus?.state?.terminated?.message?.includes("not found")) {
throw new ConfigurationError(
deline`
${description} specifies artifacts to export, but the image doesn't
Expand All @@ -179,15 +173,25 @@ export async function runAndCopy({
try {
await runner.exec({
command: ["sh", "-c", "tar --help"],
container: spec.containers[0].name,
container: mainContainerName,
ignoreError: false,
log,
stdout,
stderr,
})
} catch (err) {
// TODO: fall back to copying `arc` into the container and using that
// (tar is not static so we can't copy that directly)
// TODO: fall back to copying `arc` (https://github.com/mholt/archiver) or similar into the container and
// using that (tar is not statically compiled so we can't copy that directly). Keeping this snippet around
// for that:
// await runner.exec({
// command: ["sh", "-c", `sed -n 'w ${arcPath}'; chmod +x ${arcPath}`],
// container: containerName,
// ignoreError: false,
// input: <binary>,
// log,
// stdout,
// stderr,
// })
throw new ConfigurationError(
deline`
${description} specifies artifacts to export, but the image doesn't
Expand All @@ -197,15 +201,24 @@ export async function runAndCopy({
)
}

// Escape the command, so that we can safely pass it as a single string
const cmd = [...command!, ...(args || [])].map((s) => JSON.stringify(s)).join(" ")

result = await runner.exec({
command: [...command!, ...(args || [])],
container: spec.containers[0].name,
// Pipe the output from the command to the /tmp/output pipe, including stderr. Some shell voodoo happening here,
// but this was the only working approach I could find after a lot of trial and error.
command: ["sh", "-c", `echo $(${cmd}) >>/tmp/output 2>&1`],
container: mainContainerName,
ignoreError: true,
log,
stdout,
stderr,
})

// Need to retrieve the logs explicitly, because kubectl exec sometimes fails to capture them
const containerLogs = await getPodLogs({ api, namespace, podName, containerNames: [mainContainerName] })
result.log = containerLogs[0].log

// Copy the artifacts
await Promise.all(
artifacts.map(async (artifact) => {
Expand Down Expand Up @@ -264,15 +277,30 @@ export async function runAndCopy({
)
} finally {
await tmpDir.cleanup()
await runner.stop({ log })
await runner.stop()
}
} else {
result = await runner.startAndWait({
interactive,
ignoreError: !!ignoreError,
log,
remove: false,
timeout,
})

try {
// Need to retrieve the logs explicitly, because kubectl run sometimes fails to capture them
const containerLogs = await getPodLogs({ api, namespace, podName, containerNames: [mainContainerName] })

// Keep the existing logs if no logs come back from the API. This may happen if the Pod failed to start
// altogether.
if (containerLogs[0].log) {
result.log = containerLogs[0].log
}
} finally {
// Make sure Pod is cleaned up
await runner.stop()
}
}

return result
Expand All @@ -289,12 +317,9 @@ class PodRunnerParams {
spec: V1PodSpec
}

interface StopParams {
log: LogEntry
}

interface StartParams {
ignoreError?: boolean
input?: Buffer | string
log: LogEntry
stdout?: Writable
stderr?: Writable
Expand All @@ -309,6 +334,7 @@ type ExecParams = StartParams & {

type StartAndWaitParams = StartParams & {
interactive: boolean
remove?: boolean
}

class PodRunnerError extends GardenBaseError {
Expand Down Expand Up @@ -341,6 +367,7 @@ export class PodRunner extends PodRunnerParams {
ignoreError,
interactive,
stdout: outputStream,
remove = true,
timeout,
}: StartAndWaitParams): Promise<RunResult> {
const { module, spec } = this
Expand All @@ -351,7 +378,11 @@ export class PodRunner extends PodRunnerParams {
spec.containers[0].tty = true
}

const kubecmd = [...this.getBaseRunArgs(), "--rm", interactive ? "--tty" : "--quiet"]
const kubecmd = [...this.getBaseRunArgs(), interactive ? "--tty" : "--quiet"]

if (remove) {
kubecmd.push("--rm")
}

const command = [...(spec.containers[0].command || []), ...(spec.containers[0].args || [])]
log.verbose(`Running '${command.join(" ")}' in Pod ${this.podName}`)
Expand Down Expand Up @@ -460,7 +491,7 @@ export class PodRunner extends PodRunnerParams {
* Executes a command in the running Pod. Must be called after `start()`.
*/
async exec(params: ExecParams) {
const { log, command, container, ignoreError, stdout, stderr, timeout } = params
const { log, command, container, ignoreError, input, stdout, stderr, timeout } = params

if (!this.proc) {
throw new PodRunnerError(`Attempting to exec a command in Pod before starting it`, { command })
Expand All @@ -475,6 +506,7 @@ export class PodRunner extends PodRunnerParams {
args,
namespace: this.namespace,
ignoreError,
input,
log,
provider: this.provider,
stdout,
Expand All @@ -497,21 +529,18 @@ export class PodRunner extends PodRunnerParams {
* Disconnects from a connected Pod (if any) and removes it from the cluster. You can safely call this even
* if the process is no longer active.
*/
async stop({ log }: StopParams) {
async stop() {
if (this.proc) {
delete this.proc
}

// TODO: use API
const args = ["delete", "pod", this.podName, "--ignore-not-found=true", "--wait=false"]

await kubectl.exec({
args,
ignoreError: true,
log,
namespace: this.namespace,
provider: this.provider,
})
try {
await this.api.core.deleteNamespacedPod(this.podName, this.namespace, undefined, undefined, 0)
} catch (err) {
if (err.code !== 404) {
throw err
}
}
}

private getBaseRunArgs() {
Expand Down
103 changes: 70 additions & 33 deletions garden-service/src/plugins/kubernetes/status/pod.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,51 +57,88 @@ export function checkWorkloadPodStatus(
return { state: combineStates(pods.map(checkPodStatus)), resource }
}

/**
* Get a formatted list of log tails for each of the specified pods. Used for debugging and error logs.
*/
export async function getPodLogs(api: KubeApi, namespace: string, podNames: string[]): Promise<string> {
const allLogs = await Bluebird.map(podNames, async (name) => {
let containerName: string | undefined
export async function getPodLogs({
api,
namespace,
podName,
containerNames,
byteLimit,
lineLimit,
}: {
api: KubeApi
namespace: string
podName: string
containerNames?: string[]
byteLimit?: number
lineLimit?: number
}) {
let podRes: V1Pod

try {
const podRes = await api.core.readNamespacedPod(name, namespace)
const containerNames = podRes.spec.containers.map((c) => c.name)
if (containerNames.length > 1) {
containerName = containerNames.filter((n) => !n.match(/garden-/))[0] || containerNames[0]
} else {
containerName = containerNames[0]
}
} catch (err) {
if (err.code === 404) {
return ""
} else {
throw err
}
try {
podRes = await api.core.readNamespacedPod(podName, namespace)
} catch (err) {
if (err.code === 404) {
return []
} else {
throw err
}
}

let podContainers = podRes.spec!.containers.map((c) => c.name).filter((n) => !n.match(/garden-/))

if (containerNames) {
podContainers = podContainers.filter((name) => containerNames.includes(name))
}

return Bluebird.map(podContainers, async (containerName) => {
let log = ""

// Putting 5000 bytes as a length limit in addition to the line limit, just as a precaution in case someone
// accidentally logs a binary file or something.
try {
const log = await api.core.readNamespacedPodLog(
name,
log = await api.core.readNamespacedPodLog(
podName,
namespace,
containerName,
false,
5000,
undefined,
false,
undefined,
podLogLines
false, // follow
byteLimit,
undefined, // pretty
false, // previous
undefined, // sinceSeconds
lineLimit
)
return log ? chalk.blueBright(`\n****** ${name} ******\n`) + log : ""
} catch (err) {
if (err instanceof KubernetesError && err.message.includes("waiting to start")) {
return ""
log = ""
} else {
throw err
}
}

// the API returns undefined if no logs have been output, for some reason
return { containerName, log: log || "" }
})
}

/**
* Get a formatted list of log tails for each of the specified pods. Used for debugging and error logs.
*/
export async function getFormattedPodLogs(api: KubeApi, namespace: string, podNames: string[]): Promise<string> {
const allLogs = await Bluebird.map(podNames, async (podName) => {
return {
podName,
// Putting 5000 bytes as a length limit in addition to the line limit, just as a precaution in case someone
// accidentally logs a binary file or something.
containers: await getPodLogs({ api, namespace, podName, byteLimit: 5000, lineLimit: podLogLines }),
}
})
return allLogs.filter((l) => l !== "").join("\n\n")

return allLogs
.map(({ podName, containers }) => {
return (
chalk.blueBright(`\n****** ${podName} ******\n`) +
containers.map(({ containerName, log }) => {
return chalk.gray(`------ ${containerName} ------`) + (log || "<no logs>")
})
)
})
.join("\n\n")
}
4 changes: 2 additions & 2 deletions garden-service/src/plugins/kubernetes/status/workload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import {
} from "@kubernetes/client-node"
import dedent = require("dedent")
import { getCurrentWorkloadPods } from "../util"
import { getPodLogs, podLogLines } from "./pod"
import { getFormattedPodLogs, podLogLines } from "./pod"
import { ResourceStatus, StatusHandlerParams } from "./status"
import { getResourceEvents } from "./events"

Expand Down Expand Up @@ -83,7 +83,7 @@ export async function checkWorkloadStatus({ api, namespace, resource }: StatusHa

// Attach pod logs for debug output
const podNames = (await getPods()).map((pod) => pod.metadata.name)
const podLogs = (await getPodLogs(api, namespace, podNames)) || undefined
const podLogs = (await getFormattedPodLogs(api, namespace, podNames)) || undefined

if (podLogs) {
logs += chalk.white("\n\n━━━ Pod logs ━━━\n")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ description: Test module for attempting to copy artifacts without sh present in
type: container
tasks:
- name: missing-sh-task
command: [sh, -c, "touch /task-a.txt"]
command: [sh, -c, "touch /task-a.txt && echo ok"]
artifacts:
- source: /task-a.txt
tests:
- name: missing-sh-test
command: [sh, -c, "touch /test-a.txt"]
command: [sh, -c, "touch /test-a.txt && echo ok"]
artifacts:
- source: /test-a.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ description: Test module for attempting to copy artifacts without tar binary in
type: container
tasks:
- name: missing-tar-task
command: [sh, -c, "touch /task-a.txt"]
command: [sh, -c, "touch /task-a.txt && echo ok"]
artifacts:
- source: /task-a.txt
tests:
- name: missing-tar-test
command: [sh, -c, "touch /test-a.txt"]
command: [sh, -c, "touch /test-a.txt && echo ok"]
artifacts:
- source: /test-a.txt
Loading

0 comments on commit d0b025c

Please sign in to comment.