Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: clean up stale builds after potential crash #62

Merged
merged 3 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { Request, Response } from "express";
import { errorResponse } from "./utils";
import {
graderImageExists,
validations,
} from "@codegrade-orca/common";
import { GradingQueueOperationException, enqueueImageBuild, imageIsAwaitingBuild, imageIsBeingBuilt } from "@codegrade-orca/db";
import { graderImageExists } from "../utils/grader-images";

export const createGraderImage = async (req: Request, res: Response) => {
if (!validations.graderImageBuildRequest(req.body)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ export const deleteJob = async (req: Request, res: Response) => {
}
const deletedJob = await deleteJobInQueue(jobID);
const deletedJobConfig = deletedJob.config as object as GradingJobConfig;
await notifyClientOfCancelledJob(deletedJobConfig)
notifyClientOfCancelledJob(deletedJobConfig)
return res.status(200).json({ message: "OK" });
} catch (err) {
if (err instanceof GradingQueueOperationException) {
Expand Down
7 changes: 4 additions & 3 deletions orchestrator/packages/api/src/controllers/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,19 @@ export const errorResponse = (
return res.status(status).json({ errors: errors });
};

export const notifyClientOfCancelledJob = async (jobConfig: GradingJobConfig) => {
export const notifyClientOfCancelledJob = (jobConfig: GradingJobConfig) => {
const result: GradingJobResult = {
shell_responses: [],
errors: ["Job cancelled by a course professor or Orca admin."]
};
await fetch(jobConfig.response_url, {
console.info(jobConfig.response_url);
fetch(jobConfig.response_url, {
method: "POST",
headers: {
"Accept": "application/json",
"Content-Type": "application/json"
},
body: JSON.stringify({ result, key: jobConfig.key })
body: JSON.stringify({ ...result, key: jobConfig.key })
}).catch((err) =>
console.error(
`Encountered the following error while attempting to notify client of Job cancellation: ${err}`
Expand Down
4 changes: 0 additions & 4 deletions orchestrator/packages/api/src/utils/grader-images.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { getConfig, GradingJobConfig } from "@codegrade-orca/common";
import { execFile } from "child_process";
import { existsSync } from "fs";
import path = require("path");

const CONFIG = getConfig();
Expand All @@ -22,6 +21,3 @@ export const touchGraderImageFile = ({
);
});
};

export const graderImageExists = (graderImageSHA: string) =>
existsSync(path.join(CONFIG.dockerImageFolder, `${graderImageSHA}.tgz`));
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export interface GraderImageBuildResult {
was_successful: boolean,
logs: Array<ImageBuildLog>
logs: Array<ImageBuildLog | string>
}

export type ImageBuildStep = "Write request contents to Dockerfile." |
Expand Down
8 changes: 8 additions & 0 deletions orchestrator/packages/common/src/utils/grader-image-exists.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import path from "path";
import { getConfig } from "../config";
import { existsSync } from "fs";

const graderImageExists = (graderImageSHA: string) =>
existsSync(path.join(getConfig().dockerImageFolder, `${graderImageSHA}.tgz`));

export default graderImageExists;
3 changes: 3 additions & 0 deletions orchestrator/packages/common/src/utils/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
import graderImageExists from './grader-image-exists';

export * from './push-status-update';
export { graderImageExists };
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { graderImageExists } from "@codegrade-orca/common";
import { GraderImageBuildRequest } from "@codegrade-orca/common";
import prismaInstance from "../prisma-instance"
import handleCompletedImageBuild, { EnqueuedJobInfo } from "./handle-completed-image-build"

const cleanStaleBuildInfo = async (): Promise<Array<[GraderImageBuildRequest, EnqueuedJobInfo[]]>> =>
prismaInstance.$transaction(async (tx) => {
const possibleStaleBuildInfo = await tx.imageBuildInfo.findMany({ where: { inProgress: true } });
if (!possibleStaleBuildInfo.length) {
return [];
}

return await Promise.all(possibleStaleBuildInfo.map(async (buildInfo) => {
const { dockerfileSHA } = buildInfo;
if (graderImageExists(dockerfileSHA)) {
const originalReq: GraderImageBuildRequest = {
dockerfile_sha_sum: dockerfileSHA,
dockerfile_contents: buildInfo.dockerfileContent,
response_url: buildInfo.responseURL
};
return [originalReq, await handleCompletedImageBuild(dockerfileSHA, true) as EnqueuedJobInfo[]];
} else {
await tx.imageBuildInfo.update({ where: { dockerfileSHA }, data: { inProgress: false } });
return [];
}
})).then((lists) => lists.filter((possiblePair) => possiblePair.length)) as [GraderImageBuildRequest, EnqueuedJobInfo[]][];
});

export default cleanStaleBuildInfo;
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import getNextImageBuild from "./get-next-image-build";
import handleCompletedImageBuild from "./handle-completed-image-build";
import handleCompletedImageBuild, { EnqueuedJobInfo, CancelJobInfo } from "./handle-completed-image-build";
import cleanStaleBuildInfo from "./clean-stale-build-info";

export { getNextImageBuild };
export { getNextImageBuild, EnqueuedJobInfo, CancelJobInfo };
export { handleCompletedImageBuild };
export { cleanStaleBuildInfo };
export * from "./image-build-status";
48 changes: 37 additions & 11 deletions orchestrator/packages/image-build-service/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,29 @@ import {
GraderImageBuildRequest,
toMilliseconds,
isImageBuildResult,
pushStatusUpdate
pushStatusUpdate,
getConfig,
GraderImageBuildResult
} from "@codegrade-orca/common";
import { getNextImageBuild, handleCompletedImageBuild } from "@codegrade-orca/db";
import { createAndStoreGraderImage, removeStaleImageFiles } from "./process-request";
import { cleanUpDockerFiles, sendJobResultForBuildFail, removeImageFromDockerIfExists, notifyClientOfBuildResult } from "./utils";
import { EnqueuedJobInfo } from "@codegrade-orca/db/dist/image-builder-operations/handle-completed-image-build";
import { EnqueuedJobInfo, cleanStaleBuildInfo } from "@codegrade-orca/db";
import path from "path";
import { existsSync, rmSync } from "fs";

const LOOP_SLEEP_TIME = 5; // Seconds

const main = async () => {
console.info("Cleaning up stale build info...");
const shaSumJobInfoPairs = await cleanStaleBuildInfo();
shaSumJobInfoPairs.forEach(([originalReq, enqueuedJobs]) => {
removeDockerfileIfExists(originalReq.dockerfile_sha_sum);
notifyClientOfBuildResult(cleanedImageResult(), originalReq);
enqueuedJobs.forEach(
({ response_url, key, ...status }) => pushStatusUpdate(status, response_url, key)
);
});
console.info("Build service initialized.");
while (true) {
let infoAsBuildReq: GraderImageBuildRequest | undefined = undefined;
Expand All @@ -24,28 +37,26 @@ const main = async () => {
}

console.info(`Attempting to build image with SHA ${nextBuildReq.dockerfileSHA}.`);

infoAsBuildReq = {
dockerfile_sha_sum: nextBuildReq.dockerfileSHA,
dockerfile_contents: nextBuildReq.dockerfileContent,
response_url: nextBuildReq.responseURL,
};

const result = await createAndStoreGraderImage(infoAsBuildReq);
// When success is passed as true, we get EnqueuedJobInfo[].
const jobInfo = await handleCompletedImageBuild(nextBuildReq.dockerfileSHA, true) as EnqueuedJobInfo[];
await notifyClientOfBuildResult(result, infoAsBuildReq);
await Promise.all(jobInfo.map(({ key, response_url, ...status }) => pushStatusUpdate(status, response_url, key)));

notifyClientOfBuildResult(result, infoAsBuildReq);
jobInfo.forEach(({ key, response_url, ...status }) => pushStatusUpdate(status, response_url, key));
console.info(`Successfully built image with SHA ${nextBuildReq.dockerfileSHA}.`);
} catch (err) {
if (isImageBuildResult(err) && infoAsBuildReq) {
const cancelledJobInfoList = await handleCompletedImageBuild(infoAsBuildReq.dockerfile_sha_sum, false);
if (cancelledJobInfoList !== null) {
await Promise.all(cancelledJobInfoList.map((cancelInfo) => {
sendJobResultForBuildFail(
cancelInfo,
).catch((notifyError) => console.error(notifyError)); // At this point we can't really do anything, but we should at least log out what happened.
}));
cancelledJobInfoList.forEach((cancelInfo) => sendJobResultForBuildFail(cancelInfo));
}
await notifyClientOfBuildResult(err, infoAsBuildReq).catch((notifyError) => console.error(notifyError));
notifyClientOfBuildResult(err, infoAsBuildReq);
await cleanUpDockerFiles(infoAsBuildReq.dockerfile_sha_sum);
}
console.error(err);
Expand All @@ -58,6 +69,21 @@ const main = async () => {
}
};

const cleanedImageResult = (): GraderImageBuildResult => ({
was_successful: true,
logs: [
"This image successfully built but then the system crashed; we have cleaned up extra files and the image can now be used without issue."
]
});

const removeDockerfileIfExists = (dockerfileSHASum: string) => {
const { dockerImageFolder } = getConfig();
const imagePath = path.join(dockerImageFolder, `${dockerfileSHASum}.Dockerfile}`)
if (!existsSync(imagePath)) {
return;
}
rmSync(imagePath);
}

const sleep = (seconds: number): Promise<void> => {
return new Promise((resolve) => {
Expand Down
12 changes: 6 additions & 6 deletions orchestrator/packages/image-build-service/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,29 +47,29 @@ const imageExistsInDocker = (dockerfileSHASum: string): Promise<boolean> => {
});
};

export const sendJobResultForBuildFail = async (cancelInfo: CancelJobInfo) => {
export const sendJobResultForBuildFail = (cancelInfo: CancelJobInfo) => {
const result: GradingJobResult = {
shell_responses: [],
errors: ["The grader image for this job failed to build. Please contact a Professor or Admin."]
};
await fetch(cancelInfo.response_url, {
fetch(cancelInfo.response_url, {
method: "POST",
headers: {
"Accept": "application/json",
"Content-Type": "application/json"
},
body: JSON.stringify({ ...result, key: cancelInfo.key })
});
}).catch((err) => console.error(err));
}

export const notifyClientOfBuildResult = async (result: GraderImageBuildResult, originalReq: GraderImageBuildRequest) => {
export const notifyClientOfBuildResult = (result: GraderImageBuildResult, originalReq: GraderImageBuildRequest) => {
const { response_url } = originalReq;
await fetch(response_url, {
fetch(response_url, {
method: "POST",
headers: {
"Accept": "application/json",
"Content-Type": "application/json"
},
body: JSON.stringify(result)
});
}).catch((err) => console.error(err));
}
Loading