fix(k8s): tasks and tests would sometimes return empty logs

This was due to kubectl sometimes failing to attach to Pods. We now explicitly fetch the logs, with a bit of trickery to be able to do so when copying artifacts out of the container after running the test/task command. This should also address the flaky integ tests we've been seeing in minikube.
garden-io · Feb 7, 2020 · d0b025c · d0b025c
1 parent 78462b9
commit d0b025c
Show file tree

Hide file tree

Showing 10 changed files with 238 additions and 118 deletions.
diff --git a/garden-service/src/plugins/kubernetes/api.ts b/garden-service/src/plugins/kubernetes/api.ts
@@ -441,10 +441,12 @@ export class KubeApi {
           if (typeof output.then === "function") {
             return (
               output
-                // return the result body direcly
+                // return the result body directly if applicable
                 .then((res: any) => {
-                  if (isPlainObject(res) && res["body"] !== undefined) {
+                  if (isPlainObject(res) && res.hasOwnProperty("body")) {
                     return res["body"]
+                  } else {
+                    return res
                   }
                 })
                 // the API errors are not properly formed Error objects

diff --git a/garden-service/src/plugins/kubernetes/run.ts b/garden-service/src/plugins/kubernetes/run.ts
@@ -21,7 +21,7 @@ import { Writable } from "stream"
 import { ChildProcess } from "child_process"
 import { sleep, uniqByName } from "../../util/util"
 import { KubeApi } from "./api"
-import { checkPodStatus } from "./status/pod"
+import { checkPodStatus, getPodLogs } from "./status/pod"
 import { KubernetesServerResource } from "./types"
 import { ServiceState } from "../../types/service"
 import { RunModuleParams } from "../../types/plugin/module/runModule"
@@ -109,8 +109,9 @@ export async function runAndCopy({
       )
     }
 
-    // We start the container and let it run while we execute the target command, and then copy the artifacts
-    spec.containers[0].command = ["sh", "-c", "sleep 86400"]
+    // We start the container with a named pipe and tail that, to get the logs from the actual command
+    // we plan on running. Then we sleep, so that we can copy files out of the container.
+    spec.containers[0].command = ["sh", "-c", "mkfifo /tmp/output && tail /tmp/output && sleep 86400"]
   } else {
     if (args) {
       spec.containers[0].args = args
@@ -154,14 +155,7 @@ export async function runAndCopy({
         // Specifically look for error indicating `sh` is missing, and report with helpful message.
         const containerStatus = pod!.status.containerStatuses![0]
 
-        // FIXME: use optional chaining when TS 3.7 is out
-        if (
-          containerStatus &&
-          containerStatus.state &&
-          containerStatus.state.terminated &&
-          containerStatus.state.terminated.message &&
-          containerStatus.state.terminated.message.includes("not found")
-        ) {
+        if (containerStatus?.state?.terminated?.message?.includes("not found")) {
           throw new ConfigurationError(
             deline`
               ${description} specifies artifacts to export, but the image doesn't
@@ -179,15 +173,25 @@ export async function runAndCopy({
       try {
         await runner.exec({
           command: ["sh", "-c", "tar --help"],
-          container: spec.containers[0].name,
+          container: mainContainerName,
           ignoreError: false,
           log,
           stdout,
           stderr,
         })
       } catch (err) {
-        // TODO: fall back to copying `arc` into the container and using that
-        // (tar is not static so we can't copy that directly)
+        // TODO: fall back to copying `arc` (https://github.com/mholt/archiver) or similar into the container and
+        // using that (tar is not statically compiled so we can't copy that directly). Keeping this snippet around
+        // for that:
+        // await runner.exec({
+        //   command: ["sh", "-c", `sed -n 'w ${arcPath}'; chmod +x ${arcPath}`],
+        //   container: containerName,
+        //   ignoreError: false,
+        //   input: <binary>,
+        //   log,
+        //   stdout,
+        //   stderr,
+        // })
         throw new ConfigurationError(
           deline`
           ${description} specifies artifacts to export, but the image doesn't
@@ -197,15 +201,24 @@ export async function runAndCopy({
         )
       }
 
+      // Escape the command, so that we can safely pass it as a single string
+      const cmd = [...command!, ...(args || [])].map((s) => JSON.stringify(s)).join(" ")
+
       result = await runner.exec({
-        command: [...command!, ...(args || [])],
-        container: spec.containers[0].name,
+        // Pipe the output from the command to the /tmp/output pipe, including stderr. Some shell voodoo happening here,
+        // but this was the only working approach I could find after a lot of trial and error.
+        command: ["sh", "-c", `echo $(${cmd}) >>/tmp/output 2>&1`],
+        container: mainContainerName,
         ignoreError: true,
         log,
         stdout,
         stderr,
       })
 
+      // Need to retrieve the logs explicitly, because kubectl exec sometimes fails to capture them
+      const containerLogs = await getPodLogs({ api, namespace, podName, containerNames: [mainContainerName] })
+      result.log = containerLogs[0].log
+
       // Copy the artifacts
       await Promise.all(
         artifacts.map(async (artifact) => {
@@ -264,15 +277,30 @@ export async function runAndCopy({
       )
     } finally {
       await tmpDir.cleanup()
-      await runner.stop({ log })
+      await runner.stop()
     }
   } else {
     result = await runner.startAndWait({
       interactive,
       ignoreError: !!ignoreError,
       log,
+      remove: false,
       timeout,
     })
+
+    try {
+      // Need to retrieve the logs explicitly, because kubectl run sometimes fails to capture them
+      const containerLogs = await getPodLogs({ api, namespace, podName, containerNames: [mainContainerName] })
+
+      // Keep the existing logs if no logs come back from the API. This may happen if the Pod failed to start
+      // altogether.
+      if (containerLogs[0].log) {
+        result.log = containerLogs[0].log
+      }
+    } finally {
+      // Make sure Pod is cleaned up
+      await runner.stop()
+    }
   }
 
   return result
@@ -289,12 +317,9 @@ class PodRunnerParams {
   spec: V1PodSpec
 }
 
-interface StopParams {
-  log: LogEntry
-}
-
 interface StartParams {
   ignoreError?: boolean
+  input?: Buffer | string
   log: LogEntry
   stdout?: Writable
   stderr?: Writable
@@ -309,6 +334,7 @@ type ExecParams = StartParams & {
 
 type StartAndWaitParams = StartParams & {
   interactive: boolean
+  remove?: boolean
 }
 
 class PodRunnerError extends GardenBaseError {
@@ -341,6 +367,7 @@ export class PodRunner extends PodRunnerParams {
     ignoreError,
     interactive,
     stdout: outputStream,
+    remove = true,
     timeout,
   }: StartAndWaitParams): Promise<RunResult> {
     const { module, spec } = this
@@ -351,7 +378,11 @@ export class PodRunner extends PodRunnerParams {
       spec.containers[0].tty = true
     }
 
-    const kubecmd = [...this.getBaseRunArgs(), "--rm", interactive ? "--tty" : "--quiet"]
+    const kubecmd = [...this.getBaseRunArgs(), interactive ? "--tty" : "--quiet"]
+
+    if (remove) {
+      kubecmd.push("--rm")
+    }
 
     const command = [...(spec.containers[0].command || []), ...(spec.containers[0].args || [])]
     log.verbose(`Running '${command.join(" ")}' in Pod ${this.podName}`)
@@ -460,7 +491,7 @@ export class PodRunner extends PodRunnerParams {
    * Executes a command in the running Pod. Must be called after `start()`.
    */
   async exec(params: ExecParams) {
-    const { log, command, container, ignoreError, stdout, stderr, timeout } = params
+    const { log, command, container, ignoreError, input, stdout, stderr, timeout } = params
 
     if (!this.proc) {
       throw new PodRunnerError(`Attempting to exec a command in Pod before starting it`, { command })
@@ -475,6 +506,7 @@ export class PodRunner extends PodRunnerParams {
       args,
       namespace: this.namespace,
       ignoreError,
+      input,
       log,
       provider: this.provider,
       stdout,
@@ -497,21 +529,18 @@ export class PodRunner extends PodRunnerParams {
    * Disconnects from a connected Pod (if any) and removes it from the cluster. You can safely call this even
    * if the process is no longer active.
    */
-  async stop({ log }: StopParams) {
+  async stop() {
     if (this.proc) {
       delete this.proc
     }
 
-    // TODO: use API
-    const args = ["delete", "pod", this.podName, "--ignore-not-found=true", "--wait=false"]
-
-    await kubectl.exec({
-      args,
-      ignoreError: true,
-      log,
-      namespace: this.namespace,
-      provider: this.provider,
-    })
+    try {
+      await this.api.core.deleteNamespacedPod(this.podName, this.namespace, undefined, undefined, 0)
+    } catch (err) {
+      if (err.code !== 404) {
+        throw err
+      }
+    }
   }
 
   private getBaseRunArgs() {

diff --git a/garden-service/src/plugins/kubernetes/status/pod.ts b/garden-service/src/plugins/kubernetes/status/pod.ts
@@ -57,51 +57,88 @@ export function checkWorkloadPodStatus(
   return { state: combineStates(pods.map(checkPodStatus)), resource }
 }
 
-/**
- * Get a formatted list of log tails for each of the specified pods. Used for debugging and error logs.
- */
-export async function getPodLogs(api: KubeApi, namespace: string, podNames: string[]): Promise<string> {
-  const allLogs = await Bluebird.map(podNames, async (name) => {
-    let containerName: string | undefined
+export async function getPodLogs({
+  api,
+  namespace,
+  podName,
+  containerNames,
+  byteLimit,
+  lineLimit,
+}: {
+  api: KubeApi
+  namespace: string
+  podName: string
+  containerNames?: string[]
+  byteLimit?: number
+  lineLimit?: number
+}) {
+  let podRes: V1Pod
 
-    try {
-      const podRes = await api.core.readNamespacedPod(name, namespace)
-      const containerNames = podRes.spec.containers.map((c) => c.name)
-      if (containerNames.length > 1) {
-        containerName = containerNames.filter((n) => !n.match(/garden-/))[0] || containerNames[0]
-      } else {
-        containerName = containerNames[0]
-      }
-    } catch (err) {
-      if (err.code === 404) {
-        return ""
-      } else {
-        throw err
-      }
+  try {
+    podRes = await api.core.readNamespacedPod(podName, namespace)
+  } catch (err) {
+    if (err.code === 404) {
+      return []
+    } else {
+      throw err
     }
+  }
+
+  let podContainers = podRes.spec!.containers.map((c) => c.name).filter((n) => !n.match(/garden-/))
+
+  if (containerNames) {
+    podContainers = podContainers.filter((name) => containerNames.includes(name))
+  }
+
+  return Bluebird.map(podContainers, async (containerName) => {
+    let log = ""
 
-    // Putting 5000 bytes as a length limit in addition to the line limit, just as a precaution in case someone
-    // accidentally logs a binary file or something.
     try {
-      const log = await api.core.readNamespacedPodLog(
-        name,
+      log = await api.core.readNamespacedPodLog(
+        podName,
         namespace,
         containerName,
-        false,
-        5000,
-        undefined,
-        false,
-        undefined,
-        podLogLines
+        false, // follow
+        byteLimit,
+        undefined, // pretty
+        false, // previous
+        undefined, // sinceSeconds
+        lineLimit
       )
-      return log ? chalk.blueBright(`\n****** ${name} ******\n`) + log : ""
     } catch (err) {
       if (err instanceof KubernetesError && err.message.includes("waiting to start")) {
-        return ""
+        log = ""
       } else {
         throw err
       }
     }
+
+    // the API returns undefined if no logs have been output, for some reason
+    return { containerName, log: log || "" }
+  })
+}
+
+/**
+ * Get a formatted list of log tails for each of the specified pods. Used for debugging and error logs.
+ */
+export async function getFormattedPodLogs(api: KubeApi, namespace: string, podNames: string[]): Promise<string> {
+  const allLogs = await Bluebird.map(podNames, async (podName) => {
+    return {
+      podName,
+      // Putting 5000 bytes as a length limit in addition to the line limit, just as a precaution in case someone
+      // accidentally logs a binary file or something.
+      containers: await getPodLogs({ api, namespace, podName, byteLimit: 5000, lineLimit: podLogLines }),
+    }
   })
-  return allLogs.filter((l) => l !== "").join("\n\n")
+
+  return allLogs
+    .map(({ podName, containers }) => {
+      return (
+        chalk.blueBright(`\n****** ${podName} ******\n`) +
+        containers.map(({ containerName, log }) => {
+          return chalk.gray(`------ ${containerName} ------`) + (log || "<no logs>")
+        })
+      )
+    })
+    .join("\n\n")
 }
diff --git a/garden-service/src/plugins/kubernetes/status/workload.ts b/garden-service/src/plugins/kubernetes/status/workload.ts
@@ -21,7 +21,7 @@ import {
 } from "@kubernetes/client-node"
 import dedent = require("dedent")
 import { getCurrentWorkloadPods } from "../util"
-import { getPodLogs, podLogLines } from "./pod"
+import { getFormattedPodLogs, podLogLines } from "./pod"
 import { ResourceStatus, StatusHandlerParams } from "./status"
 import { getResourceEvents } from "./events"
 
@@ -83,7 +83,7 @@ export async function checkWorkloadStatus({ api, namespace, resource }: StatusHa
 
     // Attach pod logs for debug output
     const podNames = (await getPods()).map((pod) => pod.metadata.name)
-    const podLogs = (await getPodLogs(api, namespace, podNames)) || undefined
+    const podLogs = (await getFormattedPodLogs(api, namespace, podNames)) || undefined
 
     if (podLogs) {
       logs += chalk.white("\n\n━━━ Pod logs ━━━\n")

diff --git a/garden-service/test/data/test-projects/container/missing-sh/garden.yml b/garden-service/test/data/test-projects/container/missing-sh/garden.yml
@@ -4,11 +4,11 @@ description: Test module for attempting to copy artifacts without sh present in
 type: container
 tasks:
   - name: missing-sh-task
-    command: [sh, -c, "touch /task-a.txt"]
+    command: [sh, -c, "touch /task-a.txt && echo ok"]
     artifacts:
       - source: /task-a.txt
 tests:
   - name: missing-sh-test
-    command: [sh, -c, "touch /test-a.txt"]
+    command: [sh, -c, "touch /test-a.txt && echo ok"]
     artifacts:
       - source: /test-a.txt
diff --git a/garden-service/test/data/test-projects/container/missing-tar/garden.yml b/garden-service/test/data/test-projects/container/missing-tar/garden.yml
@@ -4,11 +4,11 @@ description: Test module for attempting to copy artifacts without tar binary in
 type: container
 tasks:
   - name: missing-tar-task
-    command: [sh, -c, "touch /task-a.txt"]
+    command: [sh, -c, "touch /task-a.txt && echo ok"]
     artifacts:
       - source: /task-a.txt
 tests:
   - name: missing-tar-test
-    command: [sh, -c, "touch /test-a.txt"]
+    command: [sh, -c, "touch /test-a.txt && echo ok"]
     artifacts:
       - source: /test-a.txt