From be816791cb338e4b48e634ccea2b4be022036568 Mon Sep 17 00:00:00 2001 From: swist Date: Thu, 18 Jun 2020 18:07:36 +0100 Subject: [PATCH] fix(build-sync): randomly choose pod for rsync Turns out it was not random enough in our cluster - it always picked the same pod for us. This ensures that we do not pick the same pod over and over again if k8s api ends up returning the pods in the same order --- .../commands/cleanup-cluster-registry.ts | 36 +++++++------------ .../src/plugins/kubernetes/container/build.ts | 25 +++++++------ 2 files changed, 24 insertions(+), 37 deletions(-) diff --git a/garden-service/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts b/garden-service/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts index f5182b5226..4c7e184bbd 100644 --- a/garden-service/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts +++ b/garden-service/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts @@ -31,7 +31,7 @@ import { waitForResources } from "../status/status" import { execInWorkload } from "../container/exec" import { dedent, deline } from "../../../util/string" import { execInPod, getDeploymentPodName, BuilderExecParams, buildSyncDeploymentName } from "../container/build" -import { getPods } from "../util" +import { getRunningPodInDeployment } from "../util" import { getSystemNamespace } from "../namespace" const workspaceSyncDirTtl = 0.5 * 86400 // 2 days @@ -388,14 +388,22 @@ async function cleanupBuildSyncVolume(provider: KubernetesProvider, log: LogEntr status: "active", }) - const podName = await getBuildSyncPodName(provider, log) + const pod = await getRunningPodInDeployment(buildSyncDeploymentName, provider, log) + const systemNamespace = await getSystemNamespace(provider, log) + if (!pod) { + throw new PluginError(`Could not find running image builder`, { + builderDeploymentName: buildSyncDeploymentName, + systemNamespace, + }) + } + const statArgs = ["sh", "-c", 'stat /data/* -c "%n %X"'] const stat = await execInBuildSync({ provider, log, args: statArgs, timeout: 30, - podName, + podName: pod.metadata.name, containerName: dockerDaemonContainerName, }) @@ -422,33 +430,13 @@ async function cleanupBuildSyncVolume(provider: KubernetesProvider, log: LogEntr log, args: deleteArgs, timeout: 300, - podName, + podName: pod.metadata.name, containerName: dockerDaemonContainerName, }) log.setSuccess() } -// Returns the name for one of the build-sync pods in the cluster -// (doesn't matter which one, they all use the same volume) -async function getBuildSyncPodName(provider: KubernetesProvider, log: LogEntry) { - const api = await KubeApi.factory(log, provider) - const systemNamespace = await getSystemNamespace(provider, log) - - const builderStatusRes = await api.apps.readNamespacedDeployment(buildSyncDeploymentName, systemNamespace) - const builderPods = await getPods(api, systemNamespace, builderStatusRes.spec.selector.matchLabels) - const pod = builderPods[0] - - if (!pod) { - throw new PluginError(`Could not find running image builder`, { - builderDeploymentName: buildSyncDeploymentName, - systemNamespace, - }) - } - - return builderPods[0].metadata.name -} - async function execInBuildSync({ provider, log, args, timeout, podName }: BuilderExecParams) { const execCmd = ["exec", "-i", podName, "--", ...args] const systemNamespace = await getSystemNamespace(provider, log) diff --git a/garden-service/src/plugins/kubernetes/container/build.ts b/garden-service/src/plugins/kubernetes/container/build.ts index 460381c4ee..bb2c72d415 100644 --- a/garden-service/src/plugins/kubernetes/container/build.ts +++ b/garden-service/src/plugins/kubernetes/container/build.ts @@ -222,6 +222,17 @@ const remoteBuild: BuildHandler = async (params) => { return {} } + const buildSyncPod = await getRunningPodInDeployment(buildSyncDeploymentName, provider, log) + + // TODO: remove this after a few releases (from 0.10.15), since this is only necessary for environments initialized + // with 0.10.14 or earlier. + if (!buildSyncPod) { + throw new PluginError(`Could not find running build sync Pod`, { + deploymentName: buildSyncDeploymentName, + systemNamespace, + }) + } + // Sync the build context to the remote sync service // -> Get a tunnel to the service log.setState("Syncing sources to cluster...") @@ -229,7 +240,7 @@ const remoteBuild: BuildHandler = async (params) => { ctx, log, namespace: systemNamespace, - targetResource: `Deployment/${buildSyncDeploymentName}`, + targetResource: `Pod/${buildSyncPod.metadata.name}`, port: RSYNC_PORT, }) @@ -257,18 +268,6 @@ const remoteBuild: BuildHandler = async (params) => { ] log.debug(`Syncing from ${src} to ${destination}`) - - // TODO: remove this after a few releases (from 0.10.15), since this is only necessary for environments initialized - // with 0.10.14 or earlier. - const buildSyncPod = await getRunningPodInDeployment(buildSyncDeploymentName, provider, log) - - if (!buildSyncPod) { - throw new PluginError(`Could not find running build sync Pod`, { - deploymentName: buildSyncDeploymentName, - systemNamespace, - }) - } - // We retry a couple of times, because we may get intermittent connection issues or concurrency issues await pRetry(() => exec("rsync", syncArgs), { retries: 3,