Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: mark orphan runners before removing them #4001

Merged
merged 9 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions lambdas/functions/control-plane/jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ const config: Config = {
...defaultConfig,
coverageThreshold: {
global: {
statements: 97.79,
branches: 96.13,
functions: 95.4,
lines: 98.06,
statements: 98.01,
branches: 97.28,
functions: 95.6,
lines: 97.94,
},
},
};
Expand Down
2 changes: 2 additions & 0 deletions lambdas/functions/control-plane/src/aws/runners.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export interface RunnerList {
type?: string;
repo?: string;
org?: string;
orphan?: boolean;
}

export interface RunnerInfo {
Expand All @@ -22,6 +23,7 @@ export interface ListRunnerFilters {
runnerType?: RunnerType;
runnerOwner?: string;
environment?: string;
orphan?: boolean;
statuses?: string[];
}

Expand Down
57 changes: 56 additions & 1 deletion lambdas/functions/control-plane/src/aws/runners.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
CreateFleetCommandInput,
CreateFleetInstance,
CreateFleetResult,
CreateTagsCommand,
DefaultTargetCapacityType,
DescribeInstancesCommand,
DescribeInstancesResult,
Expand All @@ -16,7 +17,7 @@ import { mockClient } from 'aws-sdk-client-mock';
import 'aws-sdk-client-mock-jest';

import ScaleError from './../scale-runners/ScaleError';
import { createRunner, listEC2Runners, terminateRunner } from './runners';
import { createRunner, listEC2Runners, tag, terminateRunner } from './runners';
import { RunnerInfo, RunnerInputParameters, RunnerType } from './runners.d';

process.env.AWS_REGION = 'eu-east-1';
Expand Down Expand Up @@ -67,6 +68,23 @@ describe('list instances', () => {
launchTime: new Date('2020-10-10T14:48:00.000+09:00'),
type: 'Org',
owner: 'CoderToCat',
orphan: false,
});
});

it('check orphan tag.', async () => {
const instances: DescribeInstancesResult = mockRunningInstances;
instances.Reservations![0].Instances![0].Tags!.push({ Key: 'ghr:orphan', Value: 'true' });
mockEC2Client.on(DescribeInstancesCommand).resolves(instances);

const resp = await listEC2Runners();
expect(resp.length).toBe(1);
expect(resp).toContainEqual({
instanceId: instances.Reservations![0].Instances![0].InstanceId!,
launchTime: instances.Reservations![0].Instances![0].LaunchTime!,
type: 'Org',
owner: 'CoderToCat',
orphan: true,
});
});

Expand Down Expand Up @@ -114,6 +132,23 @@ describe('list instances', () => {
});
});

it('filters instances on environment and orphan', async () => {
mockRunningInstances.Reservations![0].Instances![0].Tags!.push({
Key: 'ghr:orphan',
Value: 'true',
});
mockEC2Client.on(DescribeInstancesCommand).resolves(mockRunningInstances);
await listEC2Runners({ environment: ENVIRONMENT, orphan: true });
expect(mockEC2Client).toHaveReceivedCommandWith(DescribeInstancesCommand, {
Filters: [
{ Name: 'instance-state-name', Values: ['running', 'pending'] },
{ Name: 'tag:ghr:environment', Values: [ENVIRONMENT] },
{ Name: 'tag:ghr:orphan', Values: ['true'] },
{ Name: 'tag:ghr:Application', Values: ['github-action-runner'] },
],
});
});

it('No instances, undefined reservations list.', async () => {
const noInstances: DescribeInstancesResult = {
Reservations: undefined,
Expand Down Expand Up @@ -182,6 +217,26 @@ describe('terminate runner', () => {
});
});

describe('tag runner', () => {
beforeEach(() => {
jest.clearAllMocks();
});
it('adding extra tag', async () => {
mockEC2Client.on(CreateTagsCommand).resolves({});
const runner: RunnerInfo = {
instanceId: 'instance-2',
owner: 'owner-2',
type: 'Repo',
};
await tag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'truer' }]);

expect(mockEC2Client).toHaveReceivedCommandWith(CreateTagsCommand, {
Resources: [runner.instanceId],
Tags: [{ Key: 'ghr:orphan', Value: 'truer' }],
});
});
});

describe('create runner', () => {
const defaultRunnerConfig: RunnerConfig = {
allocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,
Expand Down
12 changes: 12 additions & 0 deletions lambdas/functions/control-plane/src/aws/runners.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import {

Check notice on line 1 in lambdas/functions/control-plane/src/aws/runners.ts

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ Getting better: Overall Code Complexity

The mean cyclomatic complexity decreases from 4.91 to 4.83, threshold = 4. This file has many conditional statements (e.g. if, for, while) across its implementation, leading to lower code health. Avoid adding more conditionals.
CreateFleetCommand,
CreateFleetResult,
CreateTagsCommand,
DescribeInstancesCommand,
DescribeInstancesResult,
EC2Client,
FleetLaunchTemplateOverridesRequest,
Tag,
TerminateInstancesCommand,
_InstanceType,
} from '@aws-sdk/client-ec2';
Expand Down Expand Up @@ -46,6 +48,9 @@
ec2FiltersBase.push({ Name: `tag:ghr:Type`, Values: [filters.runnerType] });
ec2FiltersBase.push({ Name: `tag:ghr:Owner`, Values: [filters.runnerOwner] });
}
if (filters.orphan) {
ec2FiltersBase.push({ Name: 'tag:ghr:orphan', Values: ['true'] });
npalm marked this conversation as resolved.
Show resolved Hide resolved
}

Check warning on line 53 in lambdas/functions/control-plane/src/aws/runners.ts

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ New issue: Complex Method

constructFilters has a cyclomatic complexity of 9, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.
}

for (const key of ['tag:ghr:Application']) {
Expand Down Expand Up @@ -85,6 +90,7 @@
type: i.Tags?.find((e) => e.Key === 'ghr:Type')?.Value as string,
repo: i.Tags?.find((e) => e.Key === 'ghr:Repo')?.Value as string,
org: i.Tags?.find((e) => e.Key === 'ghr:Org')?.Value as string,
orphan: i.Tags?.find((e) => e.Key === 'ghr:orphan')?.Value === 'true',

Check warning on line 93 in lambdas/functions/control-plane/src/aws/runners.ts

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Complex Method

getRunnerInfo increases in cyclomatic complexity from 13 to 15, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.
npalm marked this conversation as resolved.
Show resolved Hide resolved
});
}
}
Expand All @@ -100,6 +106,12 @@
logger.info(`Runner ${instanceId} has been terminated.`);
}

export async function tag(instanceId: string, tags: Tag[]): Promise<void> {
logger.info(`Tagging '${instanceId}'`, { tags });
const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
await ec2.send(new CreateTagsCommand({ Resources: [instanceId], Tags: tags }));
}

function generateFleetOverrides(
subnetIds: string[],
instancesTypes: string[],
Expand Down
108 changes: 88 additions & 20 deletions lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import { RunnerInfo, RunnerList } from '../aws/runners.d';
import * as ghAuth from '../gh-auth/gh-auth';
import { listEC2Runners, terminateRunner } from './../aws/runners';
import { listEC2Runners, terminateRunner, tag } from './../aws/runners';
import { githubCache } from './cache';
import { newestFirstStrategy, oldestFirstStrategy, scaleDown } from './scale-down';

Expand Down Expand Up @@ -42,6 +42,7 @@
const mockedInstallationAuth = mocked(ghAuth.createGithubInstallationAuth, { shallow: false });
const mockCreateClient = mocked(ghAuth.createOctoClient, { shallow: false });
const mockListRunners = mocked(listEC2Runners);
const mockTagRunners = mocked(tag);
const mockTerminateRunners = mocked(terminateRunner);

export interface TestData {
Expand Down Expand Up @@ -172,218 +173,279 @@
describe.each(runnerTypes)('For %s runners.', (type) => {
it('Should not call terminate when no runners online.', async () => {
// setup
mockListRunners.mockResolvedValue([]);
mockAwsRunners([]);

// act
await scaleDown();

// assert
expect(listEC2Runners).toHaveBeenCalledWith({
environment: ENVIRONMENT,
});
expect(terminateRunner).not;
expect(mockOctokit.apps.getRepoInstallation).not;
expect(mockOctokit.apps.getRepoInstallation).not;
});

it(`Should terminate runner without idle config ${type} runners.`, async () => {
// setup
const runners = [
createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES - 1, true, false, false),
createRunnerTestData('idle-2', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 4, true, false, true),
createRunnerTestData('busy-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 3, true, false, false),
createRunnerTestData('booting-1', type, MINIMUM_BOOT_TIME - 1, false, false, false),
];

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
// act
mockAwsRunners(runners);

await scaleDown();

// assert
expect(listEC2Runners).toHaveBeenCalledWith({
environment: ENVIRONMENT,
});

if (type === 'Repo') {
expect(mockOctokit.apps.getRepoInstallation).toHaveBeenCalled();
} else {
expect(mockOctokit.apps.getOrgInstallation).toHaveBeenCalled();
}

checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should respect idle runner with minimum running time not exceeded.`, async () => {
// setup
const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES - 1, true, false, false)];

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);

// act
await scaleDown();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should respect booting runner.`, async () => {
// setup
const runners = [createRunnerTestData('booting-1', type, MINIMUM_BOOT_TIME - 1, false, false, false)];

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);

// act
await scaleDown();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should respect busy runner.`, async () => {
// setup
const runners = [createRunnerTestData('busy-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, false, false)];

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);

// act
await scaleDown();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should not terminate a runner that became busy just before deregister runner.`, async () => {
// setup
const runners = [
createRunnerTestData(
'job-just-start-at-deregister-1',
type,
MINIMUM_TIME_RUNNING_IN_MINUTES + 1,
true,
false,
false,
),
];

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);
mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
return { status: 500 };
});

mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
return { status: 500 };
});

// act and ensure no exception is thrown
await expect(scaleDown()).resolves.not.toThrow();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should terminate orphan.`, async () => {
// setup
const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];
const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, false, false);
const idleRunner = createRunnerTestData('idle-1', type, MINIMUM_BOOT_TIME + 1, true, false, false);
const runners = [orphanRunner, idleRunner];

mockGitHubRunners([]);
mockListRunners.mockResolvedValue(runners);
mockGitHubRunners([idleRunner]);
mockAwsRunners(runners);

// act
await scaleDown();

// assert
checkTerminated(runners);
checkNonTerminated(runners);

expect(mockTagRunners).toHaveBeenCalledWith(orphanRunner.instanceId, [
{
Key: 'orphan',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Key: 'orphan',
Key: 'Orphan',

As above?

Value: 'true',
},
]);
expect(mockTagRunners).not.toHaveBeenCalledWith(idleRunner.instanceId, expect.anything());

// next cycle, update test data set orphan to true and terminate should be true
orphanRunner.orphan = true;
orphanRunner.shouldBeTerminated = true;

// act
await scaleDown();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should orphan termination failure is not resulting in an exception..`, async () => {
it(`Should ignore errors when termination orphan fails.`, async () => {
// setup
const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];
const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true);
const runners = [orphanRunner];

mockGitHubRunners([]);
mockListRunners.mockResolvedValue(runners);
mockTerminateRunners.mockRejectedValue(new Error('Termination failed'));
mockAwsRunners(runners);
mockTerminateRunners.mockImplementation(() => {
throw new Error('Failed to terminate');
});

// act and ensure no exception is thrown
await expect(scaleDown()).resolves.not.toThrow();
// act
await scaleDown();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

describe('When orphan termination fails', () => {
it(`Should not throw in case of list runner exception.`, async () => {
// setup
const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];

mockGitHubRunners([]);
mockListRunners.mockRejectedValueOnce(new Error('Failed to list runners'));
mockAwsRunners(runners);

// ac
await scaleDown();

// assert
checkNonTerminated(runners);
});

it(`Should not throw in case of terminate runner exception.`, async () => {
// setup
const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];

mockGitHubRunners([]);
mockAwsRunners(runners);
mockTerminateRunners.mockRejectedValue(new Error('Failed to terminate'));

// act and ensure no exception is thrown
await scaleDown();

// assert
checkNonTerminated(runners);
});
});

it(`Should not terminate instance in case de-register fails.`, async () => {
// setup
const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, true, false)];
const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, false, false)];

mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
return { status: 500 };
});
mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
return { status: 500 };
});

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);

// act and should resolve
await expect(scaleDown()).resolves.not.toThrow();

// assert
checkTerminated(runners);
checkNonTerminated(runners);
});

it(`Should not throw an exception in case of failure during removing a runner.`, async () => {
// setup
const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, true, false)];

mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
throw new Error('Failed to delete runner');
});
mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
throw new Error('Failed to delete runner');
});

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);

// act
await expect(scaleDown()).resolves.not.toThrow();
});

const evictionStrategies = ['oldest_first', 'newest_first'];
describe.each(evictionStrategies)('When idle config defined', (evictionStrategy) => {
const defaultConfig = {
idleCount: 1,
cron: '* * * * * *',
timeZone: 'Europe/Amsterdam',
evictionStrategy,
};

beforeEach(() => {
process.env.SCALE_DOWN_CONFIG = JSON.stringify([defaultConfig]);
});

it(`Should terminate based on the the idle config with ${evictionStrategy} eviction strategy`, async () => {
// setup
const runnerToTerminateTime =
evictionStrategy === 'oldest_first'
? MINIMUM_TIME_RUNNING_IN_MINUTES + 5
: MINIMUM_TIME_RUNNING_IN_MINUTES + 1;
const runners = [
createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 4, true, false, false),
createRunnerTestData('idle-to-terminate', type, runnerToTerminateTime, true, false, true),
];

mockGitHubRunners(runners);
mockListRunners.mockResolvedValue(runners);
mockAwsRunners(runners);

Check warning on line 448 in lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Large Method

'for %s'.'For %s runners.' increases from 154 to 192 lines of code, threshold = 70. Large functions with many lines of code are generally harder to understand and lower the code health. Avoid adding more lines to this function.

// act
await scaleDown();
Expand Down Expand Up @@ -502,6 +564,12 @@
});
});

function mockAwsRunners(runners: RunnerTestItem[]) {
mockListRunners.mockImplementation(async (filter) => {
return runners.filter((r) => !filter?.orphan || filter?.orphan === r.orphan);
});
}

function checkNonTerminated(runners: RunnerTestItem[]) {
const notTerminated = runners.filter((r) => !r.shouldBeTerminated);
for (const toTerminate of notTerminated) {
Expand Down
Loading