Skip to content

Commit

Permalink
fix(k8s): fix some issues with the cleanup-cluster-registry command
Browse files Browse the repository at this point in the history
Added some error handling, extended timeouts, set concurrency limits
and added batching for some potentially large requests.
  • Loading branch information
edvald committed Sep 8, 2020
1 parent 3b24339 commit 56d1a2f
Showing 1 changed file with 95 additions and 83 deletions.
178 changes: 95 additions & 83 deletions core/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,28 +187,32 @@ async function deleteImagesFromRegistry(ctx: KubernetesPluginContext, log: LogEn
status: "active",
})

await Bluebird.map(images, async (image) => {
try {
// Get the digest for the image
const [name, tag] = splitLast(image, ":")
const res = await queryRegistry(ctx, log, `${name}/manifests/${tag}`, {
method: "HEAD",
headers: {
Accept: "application/vnd.docker.distribution.manifest.v2+json",
},
})
const digest = res.headers["docker-content-digest"]
await Bluebird.map(
images,
async (image) => {
try {
// Get the digest for the image
const [name, tag] = splitLast(image, ":")
const res = await queryRegistry(ctx, log, `${name}/manifests/${tag}`, {
method: "HEAD",
headers: {
Accept: "application/vnd.docker.distribution.manifest.v2+json",
},
})
const digest = res.headers["docker-content-digest"]

// Issue the delete request
await queryRegistry(ctx, log, `${name}/manifests/${digest}`, {
method: "DELETE",
})
} catch (err) {
if (err.response?.statusCode !== 404) {
throw err
// Issue the delete request
await queryRegistry(ctx, log, `${name}/manifests/${digest}`, {
method: "DELETE",
})
} catch (err) {
if (err.response?.statusCode !== 404) {
throw err
}
}
}
})
},
{ concurrency: 100 }
)

log.info(`Flagged ${images.length} images as deleted in the registry.`)
log.setSuccess()
Expand Down Expand Up @@ -247,64 +251,67 @@ async function runRegistryGarbageCollection(ctx: KubernetesPluginContext, api: K
})
delete modifiedDeployment.status

await apply({
ctx,
log,
provider,
manifests: [modifiedDeployment],
namespace: systemNamespace,
})

// -> Wait for registry to be up again
await waitForResources({
namespace: systemNamespace,
ctx,
provider,
log,
serviceName: "docker-registry",
resources: [modifiedDeployment],
})

// Run garbage collection
log.info("Running garbage collection...")
await execInWorkload({
ctx,
provider,
log,
namespace: systemNamespace,
workload: modifiedDeployment,
command: ["/bin/registry", "garbage-collect", "/etc/docker/registry/config.yml"],
interactive: false,
})

// Restart the registry again as normal
log.info("Restarting without read-only mode...")
try {
await apply({
ctx,
log,
provider,
manifests: [modifiedDeployment],
namespace: systemNamespace,
})

// -> Re-apply the original deployment
registryDeployment = await api.apps.readNamespacedDeployment(CLUSTER_REGISTRY_DEPLOYMENT_NAME, systemNamespace)
const writableRegistry = sanitizeResource(registryDeployment)
// -> Remove the maintenance flag
writableRegistry.spec.template.spec!.containers[0].env =
writableRegistry.spec?.template.spec?.containers[0].env?.filter((e) => e.name !== "REGISTRY_STORAGE_MAINTENANCE") ||
[]
// -> Wait for registry to be up again
await waitForResources({
namespace: systemNamespace,
ctx,
provider,
log,
serviceName: "docker-registry",
resources: [modifiedDeployment],
})

await apply({
ctx,
log,
provider,
manifests: [writableRegistry],
namespace: systemNamespace,
})
// Run garbage collection
log.info("Running garbage collection...")
await execInWorkload({
ctx,
provider,
log,
namespace: systemNamespace,
workload: modifiedDeployment,
command: ["/bin/registry", "garbage-collect", "/etc/docker/registry/config.yml"],
interactive: false,
})
} finally {
// Restart the registry again as normal
log.info("Restarting without read-only mode...")

// -> Re-apply the original deployment
registryDeployment = await api.apps.readNamespacedDeployment(CLUSTER_REGISTRY_DEPLOYMENT_NAME, systemNamespace)
const writableRegistry = sanitizeResource(registryDeployment)
// -> Remove the maintenance flag
writableRegistry.spec.template.spec!.containers[0].env =
writableRegistry.spec?.template.spec?.containers[0].env?.filter(
(e) => e.name !== "REGISTRY_STORAGE_MAINTENANCE"
) || []

await apply({
ctx,
log,
provider,
manifests: [writableRegistry],
namespace: systemNamespace,
})

// -> Wait for registry to be up again
await waitForResources({
namespace: systemNamespace,
ctx,
provider,
log,
serviceName: "docker-registry",
resources: [modifiedDeployment],
})
// -> Wait for registry to be up again
await waitForResources({
namespace: systemNamespace,
ctx,
provider,
log,
serviceName: "docker-registry",
resources: [modifiedDeployment],
})
}

log.info(`Completed registry garbage collection.`)
log.setSuccess()
Expand Down Expand Up @@ -378,7 +385,7 @@ async function deleteImagesFromDaemon({
log,
command: ["docker", "rmi", ...images],
containerName: dockerDaemonContainerName,
timeoutSec: 300,
timeoutSec: 600,
})
log.setState(deline`
Deleting images:
Expand All @@ -395,7 +402,7 @@ async function deleteImagesFromDaemon({
log,
command: ["docker", "image", "prune", "-f"],
containerName: dockerDaemonContainerName,
timeoutSec: 300,
timeoutSec: 1000,
})

log.setSuccess()
Expand Down Expand Up @@ -432,7 +439,7 @@ async function cleanupBuildSyncVolume({
const stat = await runner.exec({
log,
command: ["sh", "-c", 'stat /data/* -c "%n %X"'],
timeoutSec: 30,
timeoutSec: 300,
})

// Remove directories last accessed more than workspaceSyncDirTtl ago
Expand All @@ -453,11 +460,16 @@ async function cleanupBuildSyncVolume({
// Delete the directories
log.info(`Deleting ${dirsToDelete.length} workspace directories.`)

await runner.exec({
log,
command: ["rm", "-rf", ...dirsToDelete],
timeoutSec: 30,
})
await Bluebird.map(
chunk(dirsToDelete, 100),
() =>
runner.exec({
log,
command: ["rm", "-rf", ...dirsToDelete],
timeoutSec: 300,
}),
{ concurrency: 20 }
)

log.setSuccess()
}

0 comments on commit 56d1a2f

Please sign in to comment.