Skip to content

Commit

Permalink
fix: add logging context to runner lambda (github-aws-runners#1399)
Browse files Browse the repository at this point in the history
* fix(logging): Add context to scale logs

Signed-off-by: Nathaniel McAuliffe <[email protected]>

* Remove testing

Signed-off-by: Nathaniel McAuliffe <[email protected]>

* Remove unnecessary import

Signed-off-by: Nathaniel McAuliffe <[email protected]>

* Moving log fields to end, adjusting format
  • Loading branch information
mcaulifn authored Dec 1, 2021
1 parent 8094576 commit 0ba0930
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 40 deletions.
6 changes: 3 additions & 3 deletions modules/runners/lambdas/runners/src/scale-runners/gh-auth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import {
} from '@octokit/auth-app/dist-types/types';
import { OctokitOptions } from '@octokit/core/dist-types/types';
import { getParameterValue } from './ssm';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'gh-auth' });

export async function createOctoClient(token: string, ghesApiUrl = ''): Promise<Octokit> {
const ocktokitOptions: OctokitOptions = {
Expand Down Expand Up @@ -60,7 +60,7 @@ async function createAuth(installationId: number | undefined, ghesApiUrl: string
};
if (installationId) authOptions = { ...authOptions, installationId };

logger.debug(`GHES API URL: ${ghesApiUrl}`);
logger.debug(`GHES API URL: ${ghesApiUrl}`, LogFields.print());
if (ghesApiUrl) {
authOptions.request = request.defaults({
baseUrl: ghesApiUrl,
Expand Down
9 changes: 8 additions & 1 deletion modules/runners/lambdas/runners/src/scale-runners/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ export const logger = new Logger({
displayInstanceName: false,
maskAnyRegEx: ['--token [A-Z0-9]*'],
minLevel: process.env.LOG_LEVEL || 'info',
name: 'scale-up',
name: 'scale-runners',
overwriteConsole: true,
type: process.env.LOG_TYPE || 'pretty',
});
export class LogFields {
static fields: { [key: string]: string } = {};

public static print(): string {
return JSON.stringify(LogFields.fields);
}
}
14 changes: 9 additions & 5 deletions modules/runners/lambdas/runners/src/scale-runners/runners.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { EC2, SSM } from 'aws-sdk';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'runners' });

export interface RunnerList {
instanceId: string;
Expand Down Expand Up @@ -75,16 +75,20 @@ export async function terminateRunner(instanceId: string): Promise<void> {
InstanceIds: [instanceId],
})
.promise();
logger.info(`Runner ${instanceId} has been terminated.`);
logger.info(`Runner ${instanceId} has been terminated.`, LogFields.print());
}

export async function createRunner(runnerParameters: RunnerInputParameters, launchTemplateName: string): Promise<void> {
logger.debug('Runner configuration: ' + JSON.stringify(runnerParameters));
logger.debug('Runner configuration: ' + JSON.stringify(runnerParameters), LogFields.print());
const ec2 = new EC2();
const runInstancesResponse = await ec2
.runInstances(getInstanceParams(launchTemplateName, runnerParameters))
.promise();
logger.info('Created instance(s): ', runInstancesResponse.Instances?.map((i) => i.InstanceId).join(','));
logger.info(
'Created instance(s): ',
runInstancesResponse.Instances?.map((i) => i.InstanceId).join(','),
LogFields.print(),
);
const ssm = new SSM();
runInstancesResponse.Instances?.forEach(async (i: EC2.Instance) => {
await ssm
Expand Down
53 changes: 34 additions & 19 deletions modules/runners/lambdas/runners/src/scale-runners/scale-down.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ import { listEC2Runners, RunnerInfo, RunnerList, terminateRunner } from './runne
import { getIdleRunnerCount, ScalingDownConfig } from './scale-down-config';
import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth';
import { githubCache, GhRunners } from './cache';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'scale-down' });

async function getOrCreateOctokit(runner: RunnerInfo): Promise<Octokit> {
const key = runner.owner;
const cachedOctokit = githubCache.clients.get(key);

if (cachedOctokit) {
logger.debug(`[createGitHubClientForRunner] Cache hit for ${key}`);
logger.debug(`[createGitHubClientForRunner] Cache hit for ${key}`, LogFields.print());
return cachedOctokit;
}

logger.debug(`[createGitHubClientForRunner] Cache miss for ${key}`);
logger.debug(`[createGitHubClientForRunner] Cache miss for ${key}`, LogFields.print());
const ghesBaseUrl = process.env.GHES_URL;
let ghesApiUrl = '';
if (ghesBaseUrl) {
Expand Down Expand Up @@ -50,11 +50,11 @@ async function listGitHubRunners(runner: RunnerInfo): Promise<GhRunners> {
const key = runner.owner as string;
const cachedRunners = githubCache.runners.get(key);
if (cachedRunners) {
logger.debug(`[listGithubRunners] Cache hit for ${key}`);
logger.debug(`[listGithubRunners] Cache hit for ${key}`, LogFields.print());
return cachedRunners;
}

logger.debug(`[listGithubRunners] Cache miss for ${key}`);
logger.debug(`[listGithubRunners] Cache miss for ${key}`, LogFields.print());
const client = await getOrCreateOctokit(runner);
const runners =
runner.type === 'Org'
Expand Down Expand Up @@ -102,12 +102,18 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise<

if (result.status == 204) {
await terminateRunner(ec2runner.instanceId);
logger.info(`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`);
logger.info(
`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`,
LogFields.print(),
);
} else {
logger.error(`Failed to de-register GitHub runner: ${result.status}`);
logger.error(`Failed to de-register GitHub runner: ${result.status}`, LogFields.print());
}
} catch (e) {
logger.info(`Runner '${ec2runner.instanceId}' cannot be de-registered, most likely the runner is active.`);
logger.info(
`Runner '${ec2runner.instanceId}' cannot be de-registered, most likely the runner is active.`,
LogFields.print(),
);
}
}

Expand All @@ -120,26 +126,29 @@ async function evaluateAndRemoveRunners(

for (const ownerTag of ownerTags) {
const ec2RunnersFiltered = ec2Runners.filter((runner) => runner.owner === ownerTag);
logger.debug(`Found: '${ec2RunnersFiltered.length}' active GitHub runners with owner tag: '${ownerTag}'`);
logger.debug(
`Found: '${ec2RunnersFiltered.length}' active GitHub runners with owner tag: '${ownerTag}'`,
LogFields.print(),
);
for (const ec2Runner of ec2RunnersFiltered) {
const ghRunners = await listGitHubRunners(ec2Runner);
const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId);
if (ghRunner) {
if (runnerMinimumTimeExceeded(ec2Runner)) {
if (idleCounter > 0) {
idleCounter--;
logger.info(`Runner '${ec2Runner.instanceId}' will kept idle.`);
logger.info(`Runner '${ec2Runner.instanceId}' will kept idle.`, LogFields.print());
} else {
logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`);
logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`, LogFields.print());
await removeRunner(ec2Runner, ghRunner.id);
}
}
} else {
if (bootTimeExceeded(ec2Runner)) {
logger.info(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`);
logger.info(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`, LogFields.print());
terminateOrphan(ec2Runner.instanceId);
} else {
logger.debug(`Runner ${ec2Runner.instanceId} has not yet booted.`);
logger.debug(`Runner ${ec2Runner.instanceId} has not yet booted.`, LogFields.print());
}
}
}
Expand All @@ -150,7 +159,7 @@ async function terminateOrphan(instanceId: string): Promise<void> {
try {
await terminateRunner(instanceId);
} catch (e) {
logger.debug(`Orphan runner '${instanceId}' cannot be removed.`);
logger.debug(`Orphan runner '${instanceId}' cannot be removed.`, LogFields.print());
}
}

Expand Down Expand Up @@ -195,19 +204,25 @@ export async function scaleDown(): Promise<void> {
// list and sort runners, newest first. This ensure we keep the newest runners longer.
const ec2Runners = await listAndSortRunners(environment);
const activeEc2RunnersCount = ec2Runners.length;
logger.info(`Found: '${activeEc2RunnersCount}' active GitHub EC2 runner instances before clean-up.`);
logger.info(
`Found: '${activeEc2RunnersCount}' active GitHub EC2 runner instances before clean-up.`,
LogFields.print(),
);

if (activeEc2RunnersCount === 0) {
logger.debug(`No active runners found for environment: '${environment}'`);
logger.debug(`No active runners found for environment: '${environment}'`, LogFields.print());
return;
}
const legacyRunners = filterLegacyRunners(ec2Runners);
logger.debug(JSON.stringify(legacyRunners));
logger.debug(JSON.stringify(legacyRunners), LogFields.print());
const runners = filterRunners(ec2Runners);

await evaluateAndRemoveRunners(runners, scaleDownConfigs);
await evaluateAndRemoveRunners(legacyRunners, scaleDownConfigs);

const activeEc2RunnersCountAfter = (await listAndSortRunners(environment)).length;
logger.info(`Found: '${activeEc2RunnersCountAfter}' active GitHub EC2 runners instances after clean-up.`);
logger.info(
`Found: '${activeEc2RunnersCountAfter}' active GitHub EC2 runners instances after clean-up.`,
LogFields.print(),
);
}
31 changes: 19 additions & 12 deletions modules/runners/lambdas/runners/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import { listEC2Runners, createRunner, RunnerInputParameters } from './runners';
import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth';
import yn from 'yn';
import { Octokit } from '@octokit/rest';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'scale-up' });

export interface ActionRequestMessage {
id: number;
Expand All @@ -23,7 +23,16 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
const environment = process.env.ENVIRONMENT;
const ghesBaseUrl = process.env.GHES_URL;

console.info(`Received ${payload.eventType} from ${payload.repositoryOwner}/${payload.repositoryName}`);
const runnerType = enableOrgLevel ? 'Org' : 'Repo';
const runnerOwner = enableOrgLevel ? payload.repositoryOwner : `${payload.repositoryOwner}/${payload.repositoryName}`;

LogFields.fields = {};
LogFields.fields.runnerType = runnerType;
LogFields.fields.runnerOwner = runnerOwner;
LogFields.fields.event = payload.eventType;
LogFields.fields.id = payload.id.toString();

logger.info(`Received event`, LogFields.print());

let ghesApiUrl = '';
if (ghesBaseUrl) {
Expand All @@ -50,8 +59,6 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage

const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl);
const githubInstallationClient = await createOctoClient(ghAuth.token, ghesApiUrl);
const runnerType = enableOrgLevel ? 'Org' : 'Repo';
const runnerOwner = enableOrgLevel ? payload.repositoryOwner : `${payload.repositoryOwner}/${payload.repositoryName}`;

const isQueued = await getJobStatus(githubInstallationClient, payload);
if (isQueued) {
Expand All @@ -60,10 +67,10 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
runnerType,
runnerOwner,
});
logger.info(`${runnerType} ${runnerOwner} has ${currentRunners.length}/${maximumRunners} runners`);
logger.info(`Current runners: ${currentRunners.length} of ${maximumRunners}`, LogFields.print());

if (currentRunners.length < maximumRunners) {
console.info(`Attempting to launch a new runner`);
logger.info(`Attempting to launch a new runner`, LogFields.print());
// create token
const registrationToken = enableOrgLevel
? await githubInstallationClient.actions.createRegistrationTokenForOrg({ org: payload.repositoryOwner })
Expand All @@ -87,7 +94,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
runnerType,
});
} else {
logger.info('No runner will be created, maximum number of runners reached.');
logger.info('No runner will be created, maximum number of runners reached.', LogFields.print());
}
}
}
Expand All @@ -112,7 +119,7 @@ async function getJobStatus(githubInstallationClient: Octokit, payload: ActionRe
throw Error(`Event ${payload.eventType} is not supported`);
}
if (!isQueued) {
logger.info(`Job ${payload.id} is not queued`);
logger.info(`Job not queued`, LogFields.print());
}
return isQueued;
}
Expand All @@ -121,14 +128,14 @@ export async function createRunnerLoop(runnerParameters: RunnerInputParameters):
const launchTemplateNames = process.env.LAUNCH_TEMPLATE_NAME?.split(',') as string[];
let launched = false;
for (let i = 0; i < launchTemplateNames.length; i++) {
logger.info(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]}.`);
logger.info(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]}.`, LogFields.print());
try {
await createRunner(runnerParameters, launchTemplateNames[i]);
launched = true;
break;
} catch (error) {
logger.debug(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]} FAILED.`);
logger.error(error);
logger.debug(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]} FAILED.`, LogFields.print());
logger.error(error, LogFields.print());
}
}
if (launched == false) {
Expand Down

0 comments on commit 0ba0930

Please sign in to comment.