From 0b1d7b131df27d57299a28823986375a1ac0c7d7 Mon Sep 17 00:00:00 2001 From: williams-jack Date: Sat, 3 Aug 2024 16:21:34 -0400 Subject: [PATCH 1/3] feat: clean up stale builds after potential crash --- .../controllers/docker-image-controller.ts | 2 +- .../packages/api/src/utils/grader-images.ts | 4 ---- .../common/src/utils/grader-image-exists.ts | 8 +++++++ .../packages/common/src/utils/index.ts | 3 +++ .../clean-up-stale-build-info.ts | 23 +++++++++++++++++++ .../db/src/image-builder-operations/index.ts | 6 +++-- .../packages/image-build-service/src/index.ts | 8 +++++-- 7 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 orchestrator/packages/common/src/utils/grader-image-exists.ts create mode 100644 orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts diff --git a/orchestrator/packages/api/src/controllers/docker-image-controller.ts b/orchestrator/packages/api/src/controllers/docker-image-controller.ts index c16e4e49..a9c8c702 100644 --- a/orchestrator/packages/api/src/controllers/docker-image-controller.ts +++ b/orchestrator/packages/api/src/controllers/docker-image-controller.ts @@ -1,10 +1,10 @@ import { Request, Response } from "express"; import { errorResponse } from "./utils"; import { + graderImageExists, validations, } from "@codegrade-orca/common"; import { GradingQueueOperationException, enqueueImageBuild, imageIsAwaitingBuild, imageIsBeingBuilt } from "@codegrade-orca/db"; -import { graderImageExists } from "../utils/grader-images"; export const createGraderImage = async (req: Request, res: Response) => { if (!validations.graderImageBuildRequest(req.body)) { diff --git a/orchestrator/packages/api/src/utils/grader-images.ts b/orchestrator/packages/api/src/utils/grader-images.ts index ccc2f0c2..60042a78 100644 --- a/orchestrator/packages/api/src/utils/grader-images.ts +++ b/orchestrator/packages/api/src/utils/grader-images.ts @@ -1,6 +1,5 @@ import { getConfig, GradingJobConfig } from "@codegrade-orca/common"; import { execFile } from "child_process"; -import { existsSync } from "fs"; import path = require("path"); const CONFIG = getConfig(); @@ -22,6 +21,3 @@ export const touchGraderImageFile = ({ ); }); }; - -export const graderImageExists = (graderImageSHA: string) => - existsSync(path.join(CONFIG.dockerImageFolder, `${graderImageSHA}.tgz`)); diff --git a/orchestrator/packages/common/src/utils/grader-image-exists.ts b/orchestrator/packages/common/src/utils/grader-image-exists.ts new file mode 100644 index 00000000..c35717e6 --- /dev/null +++ b/orchestrator/packages/common/src/utils/grader-image-exists.ts @@ -0,0 +1,8 @@ +import path from "path"; +import { getConfig } from "../config"; +import { existsSync } from "fs"; + +const graderImageExists = (graderImageSHA: string) => + existsSync(path.join(getConfig().dockerImageFolder, `${graderImageSHA}.tgz`)); + +export default graderImageExists; diff --git a/orchestrator/packages/common/src/utils/index.ts b/orchestrator/packages/common/src/utils/index.ts index cedb287b..b292c183 100644 --- a/orchestrator/packages/common/src/utils/index.ts +++ b/orchestrator/packages/common/src/utils/index.ts @@ -1 +1,4 @@ +import graderImageExists from './grader-image-exists'; + export * from './push-status-update'; +export { graderImageExists }; diff --git a/orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts b/orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts new file mode 100644 index 00000000..10812c22 --- /dev/null +++ b/orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts @@ -0,0 +1,23 @@ +import { graderImageExists } from "@codegrade-orca/common"; +import prismaInstance from "../prisma-instance" +import handleCompletedImageBuild, { EnqueuedJobInfo } from "./handle-completed-image-build" + +const cleanStaleBuildInfo = async (): Promise => + prismaInstance.$transaction(async (tx) => { + const possibleStaleBuildInfo = await tx.imageBuildInfo.findMany({ where: { inProgress: true } }); + if (!possibleStaleBuildInfo.length) { + return []; + } + + return await Promise.all(possibleStaleBuildInfo.map(async (buildInfo) => { + const { dockerfileSHA } = buildInfo; + if (graderImageExists(dockerfileSHA)) { + return await handleCompletedImageBuild(dockerfileSHA, true) as EnqueuedJobInfo[]; + } else { + await tx.imageBuildInfo.update({ where: { dockerfileSHA }, data: { inProgress: false } }); + return []; + } + })).then((lists) => lists.flat()); + }); + +export default cleanStaleBuildInfo; diff --git a/orchestrator/packages/db/src/image-builder-operations/index.ts b/orchestrator/packages/db/src/image-builder-operations/index.ts index 9ec4f5d5..293a1d97 100644 --- a/orchestrator/packages/db/src/image-builder-operations/index.ts +++ b/orchestrator/packages/db/src/image-builder-operations/index.ts @@ -1,6 +1,8 @@ import getNextImageBuild from "./get-next-image-build"; -import handleCompletedImageBuild from "./handle-completed-image-build"; +import handleCompletedImageBuild, { EnqueuedJobInfo, CancelJobInfo } from "./handle-completed-image-build"; +import cleanStaleBuildInfo from "./clean-up-stale-build-info"; -export { getNextImageBuild }; +export { getNextImageBuild, EnqueuedJobInfo, CancelJobInfo }; export { handleCompletedImageBuild }; +export { cleanStaleBuildInfo }; export * from "./image-build-status"; diff --git a/orchestrator/packages/image-build-service/src/index.ts b/orchestrator/packages/image-build-service/src/index.ts index 2218c1b3..7eebb76d 100644 --- a/orchestrator/packages/image-build-service/src/index.ts +++ b/orchestrator/packages/image-build-service/src/index.ts @@ -7,11 +7,16 @@ import { import { getNextImageBuild, handleCompletedImageBuild } from "@codegrade-orca/db"; import { createAndStoreGraderImage, removeStaleImageFiles } from "./process-request"; import { cleanUpDockerFiles, sendJobResultForBuildFail, removeImageFromDockerIfExists, notifyClientOfBuildResult } from "./utils"; -import { EnqueuedJobInfo } from "@codegrade-orca/db/dist/image-builder-operations/handle-completed-image-build"; +import { EnqueuedJobInfo, cleanStaleBuildInfo } from "@codegrade-orca/db"; const LOOP_SLEEP_TIME = 5; // Seconds const main = async () => { + console.info("Cleaning up stale build info..."); + const enqueuedJobs = await cleanStaleBuildInfo(); + await Promise.all(enqueuedJobs.map( + ({ response_url, key, ...status }) => pushStatusUpdate(status, response_url, key) + )); console.info("Build service initialized."); while (true) { let infoAsBuildReq: GraderImageBuildRequest | undefined = undefined; @@ -58,7 +63,6 @@ const main = async () => { } }; - const sleep = (seconds: number): Promise => { return new Promise((resolve) => { setTimeout(() => { From 5b78f408db2867ddfaa0eb2ad0355d8bd3e23615 Mon Sep 17 00:00:00 2001 From: williams-jack Date: Sat, 3 Aug 2024 17:46:17 -0400 Subject: [PATCH 2/3] feat: remove stale build dockerfile and notify client of build --- .../common/src/types/image-build-service.ts | 2 +- ...uild-info.ts => clean-stale-build-info.ts} | 12 +++-- .../db/src/image-builder-operations/index.ts | 2 +- .../packages/image-build-service/src/index.ts | 50 +++++++++++++------ .../packages/image-build-service/src/utils.ts | 12 ++--- 5 files changed, 53 insertions(+), 25 deletions(-) rename orchestrator/packages/db/src/image-builder-operations/{clean-up-stale-build-info.ts => clean-stale-build-info.ts} (55%) diff --git a/orchestrator/packages/common/src/types/image-build-service.ts b/orchestrator/packages/common/src/types/image-build-service.ts index ec0b8085..fcc15137 100644 --- a/orchestrator/packages/common/src/types/image-build-service.ts +++ b/orchestrator/packages/common/src/types/image-build-service.ts @@ -1,6 +1,6 @@ export interface GraderImageBuildResult { was_successful: boolean, - logs: Array + logs: Array } export type ImageBuildStep = "Write request contents to Dockerfile." | diff --git a/orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts b/orchestrator/packages/db/src/image-builder-operations/clean-stale-build-info.ts similarity index 55% rename from orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts rename to orchestrator/packages/db/src/image-builder-operations/clean-stale-build-info.ts index 10812c22..768de0e6 100644 --- a/orchestrator/packages/db/src/image-builder-operations/clean-up-stale-build-info.ts +++ b/orchestrator/packages/db/src/image-builder-operations/clean-stale-build-info.ts @@ -1,8 +1,9 @@ import { graderImageExists } from "@codegrade-orca/common"; +import { GraderImageBuildRequest } from "@codegrade-orca/common"; import prismaInstance from "../prisma-instance" import handleCompletedImageBuild, { EnqueuedJobInfo } from "./handle-completed-image-build" -const cleanStaleBuildInfo = async (): Promise => +const cleanStaleBuildInfo = async (): Promise> => prismaInstance.$transaction(async (tx) => { const possibleStaleBuildInfo = await tx.imageBuildInfo.findMany({ where: { inProgress: true } }); if (!possibleStaleBuildInfo.length) { @@ -12,12 +13,17 @@ const cleanStaleBuildInfo = async (): Promise => return await Promise.all(possibleStaleBuildInfo.map(async (buildInfo) => { const { dockerfileSHA } = buildInfo; if (graderImageExists(dockerfileSHA)) { - return await handleCompletedImageBuild(dockerfileSHA, true) as EnqueuedJobInfo[]; + const originalReq: GraderImageBuildRequest = { + dockerfile_sha_sum: dockerfileSHA, + dockerfile_contents: buildInfo.dockerfileContent, + response_url: buildInfo.responseURL + }; + return [originalReq, await handleCompletedImageBuild(dockerfileSHA, true) as EnqueuedJobInfo[]]; } else { await tx.imageBuildInfo.update({ where: { dockerfileSHA }, data: { inProgress: false } }); return []; } - })).then((lists) => lists.flat()); + })).then((lists) => lists.filter((possiblePair) => possiblePair.length)) as [GraderImageBuildRequest, EnqueuedJobInfo[]][]; }); export default cleanStaleBuildInfo; diff --git a/orchestrator/packages/db/src/image-builder-operations/index.ts b/orchestrator/packages/db/src/image-builder-operations/index.ts index 293a1d97..752a26db 100644 --- a/orchestrator/packages/db/src/image-builder-operations/index.ts +++ b/orchestrator/packages/db/src/image-builder-operations/index.ts @@ -1,6 +1,6 @@ import getNextImageBuild from "./get-next-image-build"; import handleCompletedImageBuild, { EnqueuedJobInfo, CancelJobInfo } from "./handle-completed-image-build"; -import cleanStaleBuildInfo from "./clean-up-stale-build-info"; +import cleanStaleBuildInfo from "./clean-stale-build-info"; export { getNextImageBuild, EnqueuedJobInfo, CancelJobInfo }; export { handleCompletedImageBuild }; diff --git a/orchestrator/packages/image-build-service/src/index.ts b/orchestrator/packages/image-build-service/src/index.ts index 7eebb76d..8a3afd87 100644 --- a/orchestrator/packages/image-build-service/src/index.ts +++ b/orchestrator/packages/image-build-service/src/index.ts @@ -2,21 +2,29 @@ import { GraderImageBuildRequest, toMilliseconds, isImageBuildResult, - pushStatusUpdate + pushStatusUpdate, + getConfig, + GraderImageBuildResult } from "@codegrade-orca/common"; import { getNextImageBuild, handleCompletedImageBuild } from "@codegrade-orca/db"; import { createAndStoreGraderImage, removeStaleImageFiles } from "./process-request"; import { cleanUpDockerFiles, sendJobResultForBuildFail, removeImageFromDockerIfExists, notifyClientOfBuildResult } from "./utils"; import { EnqueuedJobInfo, cleanStaleBuildInfo } from "@codegrade-orca/db"; +import path from "path"; +import { existsSync, rmSync } from "fs"; const LOOP_SLEEP_TIME = 5; // Seconds const main = async () => { console.info("Cleaning up stale build info..."); - const enqueuedJobs = await cleanStaleBuildInfo(); - await Promise.all(enqueuedJobs.map( - ({ response_url, key, ...status }) => pushStatusUpdate(status, response_url, key) - )); + const shaSumJobInfoPairs = await cleanStaleBuildInfo(); + shaSumJobInfoPairs.forEach(([originalReq, enqueuedJobs]) => { + removeDockerfileIfExists(originalReq.dockerfile_sha_sum); + notifyClientOfBuildResult(cleanedImageResult(), originalReq); + enqueuedJobs.forEach( + ({ response_url, key, ...status }) => pushStatusUpdate(status, response_url, key) + ); + }); console.info("Build service initialized."); while (true) { let infoAsBuildReq: GraderImageBuildRequest | undefined = undefined; @@ -29,28 +37,26 @@ const main = async () => { } console.info(`Attempting to build image with SHA ${nextBuildReq.dockerfileSHA}.`); + infoAsBuildReq = { dockerfile_sha_sum: nextBuildReq.dockerfileSHA, dockerfile_contents: nextBuildReq.dockerfileContent, response_url: nextBuildReq.responseURL, }; + const result = await createAndStoreGraderImage(infoAsBuildReq); - // When success is passed as true, we get EnqueuedJobInfo[]. const jobInfo = await handleCompletedImageBuild(nextBuildReq.dockerfileSHA, true) as EnqueuedJobInfo[]; - await notifyClientOfBuildResult(result, infoAsBuildReq); - await Promise.all(jobInfo.map(({ key, response_url, ...status }) => pushStatusUpdate(status, response_url, key))); + + notifyClientOfBuildResult(result, infoAsBuildReq); + jobInfo.forEach(({ key, response_url, ...status }) => pushStatusUpdate(status, response_url, key)); console.info(`Successfully built image with SHA ${nextBuildReq.dockerfileSHA}.`); } catch (err) { if (isImageBuildResult(err) && infoAsBuildReq) { const cancelledJobInfoList = await handleCompletedImageBuild(infoAsBuildReq.dockerfile_sha_sum, false); if (cancelledJobInfoList !== null) { - await Promise.all(cancelledJobInfoList.map((cancelInfo) => { - sendJobResultForBuildFail( - cancelInfo, - ).catch((notifyError) => console.error(notifyError)); // At this point we can't really do anything, but we should at least log out what happened. - })); + cancelledJobInfoList.forEach((cancelInfo) => sendJobResultForBuildFail(cancelInfo)); } - await notifyClientOfBuildResult(err, infoAsBuildReq).catch((notifyError) => console.error(notifyError)); + notifyClientOfBuildResult(err, infoAsBuildReq); await cleanUpDockerFiles(infoAsBuildReq.dockerfile_sha_sum); } console.error(err); @@ -63,6 +69,22 @@ const main = async () => { } }; +const cleanedImageResult = (): GraderImageBuildResult => ({ + was_successful: true, + logs: [ + "This image successfully built but then the system crashed; we have cleaned up extra files and the image can now be used without issue." + ] +}); + +const removeDockerfileIfExists = (dockerfileSHASum: string) => { + const { dockerImageFolder } = getConfig(); + const imagePath = path.join(dockerImageFolder, `${dockerfileSHASum}.Dockerfile}`) + if (!existsSync(imagePath)) { + return; + } + rmSync(imagePath); +} + const sleep = (seconds: number): Promise => { return new Promise((resolve) => { setTimeout(() => { diff --git a/orchestrator/packages/image-build-service/src/utils.ts b/orchestrator/packages/image-build-service/src/utils.ts index 4c7abebb..47fad6c8 100644 --- a/orchestrator/packages/image-build-service/src/utils.ts +++ b/orchestrator/packages/image-build-service/src/utils.ts @@ -47,29 +47,29 @@ const imageExistsInDocker = (dockerfileSHASum: string): Promise => { }); }; -export const sendJobResultForBuildFail = async (cancelInfo: CancelJobInfo) => { +export const sendJobResultForBuildFail = (cancelInfo: CancelJobInfo) => { const result: GradingJobResult = { shell_responses: [], errors: ["The grader image for this job failed to build. Please contact a Professor or Admin."] }; - await fetch(cancelInfo.response_url, { + fetch(cancelInfo.response_url, { method: "POST", headers: { "Accept": "application/json", "Content-Type": "application/json" }, body: JSON.stringify({ ...result, key: cancelInfo.key }) - }); + }).catch((err) => console.error(err)); } -export const notifyClientOfBuildResult = async (result: GraderImageBuildResult, originalReq: GraderImageBuildRequest) => { +export const notifyClientOfBuildResult = (result: GraderImageBuildResult, originalReq: GraderImageBuildRequest) => { const { response_url } = originalReq; - await fetch(response_url, { + fetch(response_url, { method: "POST", headers: { "Accept": "application/json", "Content-Type": "application/json" }, body: JSON.stringify(result) - }); + }).catch((err) => console.error(err)); } From c80aab5d34c63d67f2c2617a9e39b57cee04070a Mon Sep 17 00:00:00 2001 From: williams-jack Date: Sat, 3 Aug 2024 18:01:46 -0400 Subject: [PATCH 3/3] fix: result object should be spread in cancellation --- .../api/src/controllers/grading-queue-controller.ts | 2 +- orchestrator/packages/api/src/controllers/utils.ts | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/orchestrator/packages/api/src/controllers/grading-queue-controller.ts b/orchestrator/packages/api/src/controllers/grading-queue-controller.ts index d77f9c40..3a8c1e30 100644 --- a/orchestrator/packages/api/src/controllers/grading-queue-controller.ts +++ b/orchestrator/packages/api/src/controllers/grading-queue-controller.ts @@ -170,7 +170,7 @@ export const deleteJob = async (req: Request, res: Response) => { } const deletedJob = await deleteJobInQueue(jobID); const deletedJobConfig = deletedJob.config as object as GradingJobConfig; - await notifyClientOfCancelledJob(deletedJobConfig) + notifyClientOfCancelledJob(deletedJobConfig) return res.status(200).json({ message: "OK" }); } catch (err) { if (err instanceof GradingQueueOperationException) { diff --git a/orchestrator/packages/api/src/controllers/utils.ts b/orchestrator/packages/api/src/controllers/utils.ts index fcf75b3a..169f7f32 100644 --- a/orchestrator/packages/api/src/controllers/utils.ts +++ b/orchestrator/packages/api/src/controllers/utils.ts @@ -9,18 +9,19 @@ export const errorResponse = ( return res.status(status).json({ errors: errors }); }; -export const notifyClientOfCancelledJob = async (jobConfig: GradingJobConfig) => { +export const notifyClientOfCancelledJob = (jobConfig: GradingJobConfig) => { const result: GradingJobResult = { shell_responses: [], errors: ["Job cancelled by a course professor or Orca admin."] }; - await fetch(jobConfig.response_url, { + console.info(jobConfig.response_url); + fetch(jobConfig.response_url, { method: "POST", headers: { "Accept": "application/json", "Content-Type": "application/json" }, - body: JSON.stringify({ result, key: jobConfig.key }) + body: JSON.stringify({ ...result, key: jobConfig.key }) }).catch((err) => console.error( `Encountered the following error while attempting to notify client of Job cancellation: ${err}`