Skip to content

Commit

Permalink
fix(k8s): failures when publishing images from external registries
Browse files Browse the repository at this point in the history
Two issues fixed:

1. Resource contention due to sharing a container for image pulls. We
   now start a Pod for each pull.
2. Incorrect command used for loading the extracted image archive. This
   would _sometimes_ work correctly, but not reliably.
  • Loading branch information
edvald authored and thsig committed Mar 10, 2021
1 parent 5ac7822 commit 63a993a
Showing 5 changed files with 122 additions and 69 deletions.
6 changes: 6 additions & 0 deletions core/src/plugins/container/helpers.ts
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@ import titleize from "titleize"
import { deline, stripQuotes } from "../../util/string"
import { PluginContext } from "../../plugin-context"
import { ModuleVersion } from "../../vcs/vcs"
import { SpawnParams } from "../../util/ext-tools"

interface DockerVersion {
client?: string
@@ -325,6 +326,11 @@ const helpers = {
}
},

spawnDockerCli(params: SpawnParams & { ctx: PluginContext }) {
const docker = params.ctx.tools["container.docker"]
return docker.spawn(params)
},

hasDockerfile(config: ContainerModuleConfig, version: ModuleVersion): boolean {
// If we explicitly set a Dockerfile, we take that to mean you want it to be built.
// If the file turns out to be missing, this will come up in the build handler.
131 changes: 78 additions & 53 deletions core/src/plugins/kubernetes/commands/pull-image.ts
Original file line number Diff line number Diff line change
@@ -20,12 +20,14 @@ import { LogEntry } from "../../../logger/log-entry"
import { containerHelpers } from "../../container/helpers"
import { RuntimeError } from "../../../exceptions"
import { PodRunner } from "../run"
import { inClusterRegistryHostname, gardenUtilDaemonDeploymentName } from "../constants"
import { dockerAuthSecretKey, dockerAuthSecretName, inClusterRegistryHostname, k8sUtilImageName } from "../constants"
import { getAppNamespace, getSystemNamespace } from "../namespace"
import { getDeploymentPod } from "../util"
import { getRegistryPortForward } from "../container/util"
import { randomString } from "../../../util/string"
import { buildkitAuthSecretName, ensureBuilderSecret } from "../container/build/buildkit"
import { PluginContext } from "../../../plugin-context"
import { buildkitDeploymentName } from "../container/build/buildkit"

const tmpTarPath = "/tmp/image.tar"

export const pullImage: PluginCommand = {
name: "pull-image",
@@ -143,97 +145,120 @@ async function pullFromExternalRegistry(
const buildMode = ctx.provider.config.buildMode

let namespace: string
let deploymentName: string
let authSecretName: string

if (buildMode === "cluster-buildkit") {
namespace = await getAppNamespace(ctx, log, ctx.provider)
deploymentName = buildkitDeploymentName
authSecretName = buildkitAuthSecretName

await ensureBuilderSecret({
provider: ctx.provider,
log,
api,
namespace,
waitForUpdate: false,
})
} else {
namespace = await getSystemNamespace(ctx, ctx.provider, log)
deploymentName = gardenUtilDaemonDeploymentName
authSecretName = dockerAuthSecretName
}

const imageId = containerHelpers.getDeploymentImageId(module, module.version, ctx.provider.config.deploymentRegistry)
const tarName = `/tmp/${module.name}-${module.version.versionString}`

// See https://github.com/containers/skopeo for how all this works and the syntax
const skopeoCommand = [
"skopeo",
"--command-timeout=300s",
"--insecure-policy",
"copy",
"--quiet",
`docker://${imageId}`,
`docker-archive:${tarName}`,
`docker-archive:${tmpTarPath}:${localId}`,
]

const pod = await getDeploymentPod({
api,
deploymentName,
namespace,
})
const runner = new PodRunner({
api,
ctx,
provider: ctx.provider,
namespace,
pod,
pod: {
apiVersion: "v1",
kind: "Pod",
metadata: {
name: `pull-image-${randomString(8)}`,
namespace,
},
spec: {
containers: [
{
name: "main",
image: k8sUtilImageName,
command: ["sleep", "360"],
volumeMounts: [
{
name: authSecretName,
mountPath: "/home/user/.docker",
readOnly: true,
},
],
},
],
volumes: [
{
name: authSecretName,
secret: {
secretName: authSecretName,
items: [{ key: dockerAuthSecretKey, path: "config.json" }],
},
},
],
},
},
})

await runner.exec({
command: ["sh", "-c", skopeoCommand.join(" ")],
containerName: "util",
log,
timeoutSec: 60 * 1000 * 5, // 5 minutes,
})
log.debug(`Pulling image ${imageId} from registry to local docker`)

try {
await importImage({ module, runner, tarName, imageId, log, ctx })
await containerHelpers.dockerCli({ cwd: module.buildPath, args: ["tag", imageId, localId], log, ctx })
await runner.start({ log })

await runner.exec({
log,
command: skopeoCommand,
tty: false,
timeoutSec: 60 * 1000 * 5, // 5 minutes,
buffer: true,
})

log.debug(`Loading image to local docker with ID ${localId}`)
await loadImage({ ctx, runner, log })
} catch (err) {
throw new RuntimeError(`Failed pulling image for module ${module.name} with image id ${imageId}: ${err}`, {
throw new RuntimeError(`Failed pulling image ${imageId}: ${err.message}`, {
err,
imageId,
localId,
})
} finally {
try {
await runner.exec({
command: ["rm", "-rf", tarName],
containerName: "util",
log,
})
} catch (err) {
log.warn("Failed cleaning up temporary file: " + err.message)
}
await runner.stop()
}
}

async function importImage({
module,
runner,
tarName,
imageId,
log,
ctx,
}: {
module: GardenModule
runner: PodRunner
tarName: string
imageId: string
log: LogEntry
ctx: PluginContext
}) {
const getOutputCommand = ["cat", tarName]

async function loadImage({ ctx, runner, log }: { ctx: PluginContext; runner: PodRunner; log: LogEntry }) {
await tmp.withFile(async ({ path }) => {
let writeStream = fs.createWriteStream(path)

await runner.exec({
command: getOutputCommand,
containerName: "util",
command: ["cat", tmpTarPath],
containerName: "main",
log,
stdout: writeStream,
buffer: false,
})

const args = ["import", path, imageId]
await containerHelpers.dockerCli({ cwd: module.buildPath, args, log, ctx })
await containerHelpers.dockerCli({
ctx,
cwd: "/tmp",
args: ["load", "-i", path],
log,
})
})
}
48 changes: 35 additions & 13 deletions core/src/plugins/kubernetes/container/build/buildkit.ts
Original file line number Diff line number Diff line change
@@ -197,19 +197,13 @@ export async function ensureBuildkit({
const manifest = getBuildkitDeployment(provider)
const status = await compareDeployedResources(ctx as KubernetesPluginContext, api, namespace, [manifest], deployLog)

// Ensure docker auth secret is available and up-to-date in the namespace
const authSecret = await prepareDockerAuth(api, provider, namespace)
authSecret.metadata.name = buildkitAuthSecretName
const existingSecret = await api.readOrNull({ log: deployLog, namespace, manifest: authSecret })

if (!existingSecret || authSecret.data?.[dockerAuthSecretKey] !== existingSecret.data?.[dockerAuthSecretKey]) {
deployLog.setState(chalk.gray(`-> Updating Docker auth secret in namespace ${namespace}`))
await api.upsert({ kind: "Secret", namespace, log: deployLog, obj: authSecret })
// Need to wait a little to ensure the secret is updated in the buildkit deployment
if (status.state === "ready") {
await sleep(5)
}
}
await ensureBuilderSecret({
provider,
log,
api,
namespace,
waitForUpdate: status.state === "ready",
})

if (status.state === "ready") {
return false
@@ -237,6 +231,34 @@ export async function ensureBuildkit({
})
}

export async function ensureBuilderSecret({
provider,
log,
api,
namespace,
waitForUpdate,
}: {
provider: KubernetesProvider
log: LogEntry
api: KubeApi
namespace: string
waitForUpdate: boolean
}) {
// Ensure docker auth secret is available and up-to-date in the namespace
const authSecret = await prepareDockerAuth(api, provider, namespace)
authSecret.metadata.name = buildkitAuthSecretName
const existingSecret = await api.readOrNull({ log, namespace, manifest: authSecret })

if (!existingSecret || authSecret.data?.[dockerAuthSecretKey] !== existingSecret.data?.[dockerAuthSecretKey]) {
log.setState(chalk.gray(`-> Updating Docker auth secret in namespace ${namespace}`))
await api.upsert({ kind: "Secret", namespace, log, obj: authSecret })
// Need to wait a little to ensure the secret is updated in the buildkit deployment
if (waitForUpdate) {
await sleep(5)
}
}
}

export function getBuildkitFlags(module: ContainerModule) {
const args: string[] = []

2 changes: 1 addition & 1 deletion core/src/plugins/kubernetes/run.ts
Original file line number Diff line number Diff line change
@@ -488,7 +488,7 @@ async function runWithArtifacts({
stderr,
// Anything above two minutes for this would be unusual
timeoutSec: 120,
buffer: false,
buffer: true,
})
} catch (err) {
// TODO: fall back to copying `arc` (https://github.com/mholt/archiver) or similar into the container and
4 changes: 2 additions & 2 deletions core/test/integ/src/plugins/kubernetes/commands/pull-image.ts
Original file line number Diff line number Diff line change
@@ -55,12 +55,12 @@ describe("pull-image plugin command", () => {
const imageId = containerHelpers.getLocalImageId(module, module.version)
const imageHash = await containerHelpers.dockerCli({
cwd: module.buildPath,
args: ["images", "-q", imageId],
args: ["run", imageId, "echo", "ok"],
log: garden.log,
ctx,
})

expect(imageHash.stdout.length).to.be.greaterThan(0)
expect(imageHash.stdout.trim()).to.equal("ok")
}

grouped("cluster-docker", "remote-only").context("using an external cluster registry", () => {

0 comments on commit 63a993a

Please sign in to comment.