Skip to content

Commit

Permalink
src: fail workflow if cleanup fails
Browse files Browse the repository at this point in the history
  • Loading branch information
adityamaru committed Dec 18, 2024
1 parent 7227817 commit 7d2d195
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 47 deletions.
14 changes: 7 additions & 7 deletions dist/index.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/index.js.map

Large diffs are not rendered by default.

59 changes: 40 additions & 19 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ export async function startBlacksmithBuilder(inputs: context.Inputs): Promise<{a
} catch (error) {
// If the builder setup fails for any reason, we check if we should fallback to a local build.
// If we should not fallback, we rethrow the error and fail the build.
await reporter.reportBuildPushActionFailure(error);
await reporter.reportBuildPushActionFailure(error, "starting blacksmith builder");

let errorMessage = `Error during Blacksmith builder setup: ${error.message}`;
if (error.message.includes('buildkitd')) {
Expand Down Expand Up @@ -137,14 +137,13 @@ actionsToolkit.run(
buildId: null as string | null,
exposeId: '' as string
};
await core.group(`Starting Blacksmith builder`, async () => {
builderInfo = await startBlacksmithBuilder(inputs);
});

let buildError: Error | undefined;
let buildDurationSeconds: string | undefined;
let ref: string | undefined;
try {
await core.group(`Starting Blacksmith builder`, async () => {
builderInfo = await startBlacksmithBuilder(inputs);
});
if (builderInfo.addr) {
await core.group(`Creating a builder instance`, async () => {
const name = `blacksmith-${Date.now().toString(36)}`;
Expand Down Expand Up @@ -315,22 +314,43 @@ actionsToolkit.run(
refs: ref ? [ref] : []
});
}
await shutdownBuildkitd();
core.info('Shutdown buildkitd');
for (let attempt = 1; attempt <= 10; attempt++) {
try {
await execAsync(`sudo umount ${mountPoint}`);
core.debug(`${mountPoint} has been unmounted`);
break;
} catch (error) {
if (attempt === 10) {
throw error;
try {
const {stdout} = await execAsync('pgrep -f buildkitd');
if (stdout.trim()) {
await shutdownBuildkitd();
core.info('Shutdown buildkitd');
}
} catch (error) {
// No buildkitd process found, nothing to shutdown
core.debug('No buildkitd process found running');
}
try {
const {stdout: mountOutput} = await execAsync(`mount | grep ${mountPoint}`);
if (mountOutput) {
for (let attempt = 1; attempt <= 3; attempt++) {
try {
await execAsync(`sudo umount ${mountPoint}`);
core.debug(`${mountPoint} has been unmounted`);
break;
} catch (error) {
if (attempt === 3) {
throw error;
}
core.warning(`Unmount failed, retrying (${attempt}/3)...`);
await new Promise(resolve => setTimeout(resolve, 100));
}
}
core.warning(`Unmount failed, retrying (${attempt}/10)...`);
await new Promise(resolve => setTimeout(resolve, 300));
core.info('Unmounted device');
}
} catch (error) {
// grep returns exit code 1 when no matches are found.
if (error.code === 1) {
core.debug('No dangling mounts found to clean up');
} else {
// Only warn for actual errors, not for the expected case where grep finds nothing.
core.warning(`Error during cleanup: ${error.message}`);
}
}
core.info('Unmounted device');

if (builderInfo.addr) {
if (!buildError) {
Expand All @@ -341,7 +361,8 @@ actionsToolkit.run(
}
} catch (error) {
core.warning(`Error during Blacksmith builder shutdown: ${error.message}`);
await reporter.reportBuildPushActionFailure(error);
await reporter.reportBuildPushActionFailure(error, "shutting down blacksmith builder");
core.setFailed(`Error during Blacksmith builder shutdown: ${error.message}`);
} finally {
if (buildError) {
try {
Expand Down
19 changes: 4 additions & 15 deletions src/reporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ export function createBlacksmithAgentClient() {
return createClient(StickyDiskService, transport);
}

export async function reportBuildPushActionFailure(error?: Error) {
export async function reportBuildPushActionFailure(error?: Error, event?: string) {
const requestOptions = {
stickydisk_key: process.env.GITHUB_REPO_NAME || '',
repo_name: process.env.GITHUB_REPO_NAME || '',
region: process.env.BLACKSMITH_REGION || 'eu-central',
arch: process.env.BLACKSMITH_ENV?.includes('arm') ? 'arm64' : 'amd64',
vm_id: process.env.VM_ID || '',
petname: process.env.PETNAME || '',
message: error?.message || ''
message: event ? `${event}: ${error?.message || ''}` : error?.message || ''
};

const client = createBlacksmithAPIClient();
Expand All @@ -66,7 +66,7 @@ export async function reportBuildCompleted(exportRes?: ExportRecordResponse, bla
}

try {
const agentClient = await createBlacksmithAgentClient();
const agentClient = createBlacksmithAgentClient();

await agentClient.commitStickyDisk({
exposeId: exposeId || '',
Expand Down Expand Up @@ -117,7 +117,7 @@ export async function reportBuildFailed(dockerBuildId: string | null, dockerBuil
}

try {
const blacksmithAgentClient = await createBlacksmithAgentClient();
const blacksmithAgentClient = createBlacksmithAgentClient();
await blacksmithAgentClient.commitStickyDisk({
exposeId: exposeId || '',
stickyDiskKey: process.env.GITHUB_REPO_NAME || '',
Expand Down Expand Up @@ -166,17 +166,6 @@ export async function reportBuild(dockerfilePath: string) {
}
}

export async function get(client: AxiosInstance, url: string, formData: FormData | null, options?: {signal?: AbortSignal}): Promise<AxiosResponse> {
return await client.get(url, {
...(formData && {data: formData}),
headers: {
...client.defaults.headers.common,
...(formData && {'Content-Type': 'multipart/form-data'})
},
signal: options?.signal
});
}

export async function post(client: AxiosInstance, url: string, formData: FormData | null, options?: {signal?: AbortSignal}): Promise<AxiosResponse> {
return await client.post(url, formData, {
headers: {
Expand Down
47 changes: 42 additions & 5 deletions src/setup_builder.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as fs from 'fs';
import * as core from '@actions/core';
import {exec} from 'child_process';
import {exec, execSync} from 'child_process';
import {promisify} from 'util';
import * as TOML from '@iarna/toml';
import * as reporter from './reporter';
Expand Down Expand Up @@ -177,19 +177,56 @@ export async function startAndConfigureBuildkitd(parallelism: number, device: st

// Change permissions on the buildkitd socket to allow non-root access
const startTime = Date.now();
const timeout = 10000; // 10 seconds in milliseconds
const timeout = 30000; // 30 seconds in milliseconds

core.info('Before checking socket:');
core.info(execSync('ls -alR /run/buildkit || true').toString());


while (Date.now() - startTime < timeout) {
if (fs.existsSync('/run/buildkit/buildkitd.sock')) {
// Change permissions on the buildkitd socket to allow non-root access
await execAsync(`sudo chmod 666 /run/buildkit/buildkitd.sock`);
break;
}
await new Promise(resolve => setTimeout(resolve, 100)); // Poll every 100ms
await new Promise(resolve => setTimeout(resolve, 1000)); // Poll every 100ms
}

if (!fs.existsSync('/run/buildkit/buildkitd.sock')) {
throw new Error('buildkitd socket not found after 10s timeout');
core.info('After checking socket:');
core.info(execSync('ls -alR /run/buildkit || true').toString());
throw new Error('buildkitd socket not found after 30s timeout');
}
// Check that buildkit instance is ready by querying workers for up to 30s
const startTimeBuildkitReady = Date.now();
const timeoutBuildkitReady = 30000; // 30 seconds

while (Date.now() - startTimeBuildkitReady < timeoutBuildkitReady) {
try {
const {stdout} = await execAsync('sudo buildctl debug workers');
const lines = stdout.trim().split('\n');
if (lines.length > 1) { // Check if we have output lines beyond the header
core.info('buildkit workers are ready');
core.info(stdout);
break;
}
core.info('buildkit workers not ready yet, retrying...');
} catch (error) {
core.info(`Error checking buildkit workers: ${error.message}`);
}
await new Promise(resolve => setTimeout(resolve, 1000));
}

// Final check after timeout
try {
const {stdout} = await execAsync('sudo buildctl debug workers');
const lines = stdout.trim().split('\n');
if (lines.length <= 1) {
throw new Error('buildkit workers not ready after 30s timeout');
}
} catch (error) {
core.warning(`Error checking buildkit workers: ${error.message}`);
throw error;
}
return buildkitdAddr;
}
Expand All @@ -199,7 +236,7 @@ export async function startAndConfigureBuildkitd(parallelism: number, device: st
export async function setupStickyDisk(dockerfilePath: string): Promise<{device: string; buildId?: string | null; exposeId: string}> {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000);
const timeoutId = setTimeout(() => controller.abort(), 30000);

let buildResponse: {docker_build_id: string} | null = null;
let exposeId: string = '';
Expand Down

0 comments on commit 7d2d195

Please sign in to comment.