github-aws-runners · npalm · Aug 1, 2024 · Jul 17, 2024 · Jul 17, 2024 · Jul 17, 2024
@@ -6,10 +6,10 @@ const config: Config = {
   ...defaultConfig,
   coverageThreshold: {
     global: {
-      statements: 97.79,
-      branches: 96.13,
-      functions: 95.4,
-      lines: 98.06,
+      statements: 98.01,
+      branches: 97.28,
+      functions: 95.6,
+      lines: 97.94,
     },
   },
 };

@@ -9,6 +9,7 @@ export interface RunnerList {
   type?: string;
   repo?: string;
   org?: string;
+  orphan?: boolean;
 }
 
 export interface RunnerInfo {
@@ -22,6 +23,7 @@ export interface ListRunnerFilters {
   runnerType?: RunnerType;
   runnerOwner?: string;
   environment?: string;
+  orphan?: boolean;
   statuses?: string[];
 }
 

@@ -3,6 +3,7 @@ import {
   CreateFleetCommandInput,
   CreateFleetInstance,
   CreateFleetResult,
+  CreateTagsCommand,
   DefaultTargetCapacityType,
   DescribeInstancesCommand,
   DescribeInstancesResult,
@@ -16,7 +17,7 @@ import { mockClient } from 'aws-sdk-client-mock';
 import 'aws-sdk-client-mock-jest';
 
 import ScaleError from './../scale-runners/ScaleError';
-import { createRunner, listEC2Runners, terminateRunner } from './runners';
+import { createRunner, listEC2Runners, tag, terminateRunner } from './runners';
 import { RunnerInfo, RunnerInputParameters, RunnerType } from './runners.d';
 
 process.env.AWS_REGION = 'eu-east-1';
@@ -67,6 +68,23 @@ describe('list instances', () => {
       launchTime: new Date('2020-10-10T14:48:00.000+09:00'),
       type: 'Org',
       owner: 'CoderToCat',
+      orphan: false,
+    });
+  });
+
+  it('check orphan tag.', async () => {
+    const instances: DescribeInstancesResult = mockRunningInstances;
+    instances.Reservations![0].Instances![0].Tags!.push({ Key: 'ghr:orphan', Value: 'true' });
+    mockEC2Client.on(DescribeInstancesCommand).resolves(instances);
+
+    const resp = await listEC2Runners();
+    expect(resp.length).toBe(1);
+    expect(resp).toContainEqual({
+      instanceId: instances.Reservations![0].Instances![0].InstanceId!,
+      launchTime: instances.Reservations![0].Instances![0].LaunchTime!,
+      type: 'Org',
+      owner: 'CoderToCat',
+      orphan: true,
     });
   });
 
@@ -114,6 +132,23 @@ describe('list instances', () => {
     });
   });
 
+  it('filters instances on environment and orphan', async () => {
+    mockRunningInstances.Reservations![0].Instances![0].Tags!.push({
+      Key: 'ghr:orphan',
+      Value: 'true',
+    });
+    mockEC2Client.on(DescribeInstancesCommand).resolves(mockRunningInstances);
+    await listEC2Runners({ environment: ENVIRONMENT, orphan: true });
+    expect(mockEC2Client).toHaveReceivedCommandWith(DescribeInstancesCommand, {
+      Filters: [
+        { Name: 'instance-state-name', Values: ['running', 'pending'] },
+        { Name: 'tag:ghr:environment', Values: [ENVIRONMENT] },
+        { Name: 'tag:ghr:orphan', Values: ['true'] },
+        { Name: 'tag:ghr:Application', Values: ['github-action-runner'] },
+      ],
+    });
+  });
+
   it('No instances, undefined reservations list.', async () => {
     const noInstances: DescribeInstancesResult = {
       Reservations: undefined,
@@ -182,6 +217,26 @@ describe('terminate runner', () => {
   });
 });
 
+describe('tag runner', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+  it('adding extra tag', async () => {
+    mockEC2Client.on(CreateTagsCommand).resolves({});
+    const runner: RunnerInfo = {
+      instanceId: 'instance-2',
+      owner: 'owner-2',
+      type: 'Repo',
+    };
+    await tag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'truer' }]);
+
+    expect(mockEC2Client).toHaveReceivedCommandWith(CreateTagsCommand, {
+      Resources: [runner.instanceId],
+      Tags: [{ Key: 'ghr:orphan', Value: 'truer' }],
+    });
+  });
+});
+
 describe('create runner', () => {
   const defaultRunnerConfig: RunnerConfig = {
     allocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,

@@ -1,10 +1,12 @@
 import {
   CreateFleetCommand,
   CreateFleetResult,
+  CreateTagsCommand,
   DescribeInstancesCommand,
   DescribeInstancesResult,
   EC2Client,
   FleetLaunchTemplateOverridesRequest,
+  Tag,
   TerminateInstancesCommand,
   _InstanceType,
 } from '@aws-sdk/client-ec2';
@@ -46,6 +48,9 @@
       ec2FiltersBase.push({ Name: `tag:ghr:Type`, Values: [filters.runnerType] });
       ec2FiltersBase.push({ Name: `tag:ghr:Owner`, Values: [filters.runnerOwner] });
     }
+    if (filters.orphan) {
+      ec2FiltersBase.push({ Name: 'tag:ghr:orphan', Values: ['true'] });
+    }
   }
 
   for (const key of ['tag:ghr:Application']) {
@@ -85,6 +90,7 @@
             type: i.Tags?.find((e) => e.Key === 'ghr:Type')?.Value as string,
             repo: i.Tags?.find((e) => e.Key === 'ghr:Repo')?.Value as string,
             org: i.Tags?.find((e) => e.Key === 'ghr:Org')?.Value as string,
+            orphan: i.Tags?.find((e) => e.Key === 'ghr:orphan')?.Value === 'true',
           });
         }
       }
@@ -100,6 +106,12 @@
   logger.info(`Runner ${instanceId} has been terminated.`);
 }
 
+export async function tag(instanceId: string, tags: Tag[]): Promise<void> {
+  logger.info(`Tagging '${instanceId}'`, { tags });
+  const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
+  await ec2.send(new CreateTagsCommand({ Resources: [instanceId], Tags: tags }));
+}
+
 function generateFleetOverrides(
   subnetIds: string[],
   instancesTypes: string[],

@@ -5,7 +5,7 @@
 
 import { RunnerInfo, RunnerList } from '../aws/runners.d';
 import * as ghAuth from '../gh-auth/gh-auth';
-import { listEC2Runners, terminateRunner } from './../aws/runners';
+import { listEC2Runners, terminateRunner, tag } from './../aws/runners';
 import { githubCache } from './cache';
 import { newestFirstStrategy, oldestFirstStrategy, scaleDown } from './scale-down';
 
@@ -42,6 +42,7 @@
 const mockedInstallationAuth = mocked(ghAuth.createGithubInstallationAuth, { shallow: false });
 const mockCreateClient = mocked(ghAuth.createOctoClient, { shallow: false });
 const mockListRunners = mocked(listEC2Runners);
+const mockTagRunners = mocked(tag);
 const mockTerminateRunners = mocked(terminateRunner);
 
 export interface TestData {
@@ -172,218 +173,279 @@
     describe.each(runnerTypes)('For %s runners.', (type) => {
       it('Should not call terminate when no runners online.', async () => {
         // setup
-        mockListRunners.mockResolvedValue([]);
+        mockAwsRunners([]);
 
         // act
         await scaleDown();

        // assert
        expect(listEC2Runners).toHaveBeenCalledWith({
          environment: ENVIRONMENT,
        });
        expect(terminateRunner).not;
        expect(mockOctokit.apps.getRepoInstallation).not;
        expect(mockOctokit.apps.getRepoInstallation).not;
      });

      it(`Should terminate runner without idle config ${type} runners.`, async () => {
        // setup
        const runners = [
          createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES - 1, true, false, false),
          createRunnerTestData('idle-2', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 4, true, false, true),
          createRunnerTestData('busy-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 3, true, false, false),
          createRunnerTestData('booting-1', type, MINIMUM_BOOT_TIME - 1, false, false, false),
        ];
 
         mockGitHubRunners(runners);
         mockListRunners.mockResolvedValue(runners);
-        // act
+        mockAwsRunners(runners);
+
         await scaleDown();
 
         // assert
        expect(listEC2Runners).toHaveBeenCalledWith({
          environment: ENVIRONMENT,
        });

        if (type === 'Repo') {
          expect(mockOctokit.apps.getRepoInstallation).toHaveBeenCalled();
        } else {
          expect(mockOctokit.apps.getOrgInstallation).toHaveBeenCalled();
        }

        checkTerminated(runners);
        checkNonTerminated(runners);
      });

      it(`Should respect idle runner with minimum running time not exceeded.`, async () => {
        // setup
         const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES - 1, true, false, false)];
 
         mockGitHubRunners(runners);
-        mockListRunners.mockResolvedValue(runners);
+        mockAwsRunners(runners);
 
         // act
         await scaleDown();

        // assert
        checkTerminated(runners);
        checkNonTerminated(runners);
      });

      it(`Should respect booting runner.`, async () => {
        // setup
         const runners = [createRunnerTestData('booting-1', type, MINIMUM_BOOT_TIME - 1, false, false, false)];
 
         mockGitHubRunners(runners);
-        mockListRunners.mockResolvedValue(runners);
+        mockAwsRunners(runners);
 
         // act
         await scaleDown();

        // assert
        checkTerminated(runners);
        checkNonTerminated(runners);
      });

      it(`Should respect busy runner.`, async () => {
        // setup
         const runners = [createRunnerTestData('busy-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, false, false)];
 
         mockGitHubRunners(runners);
-        mockListRunners.mockResolvedValue(runners);
+        mockAwsRunners(runners);
 
         // act
         await scaleDown();

        // assert
        checkTerminated(runners);
        checkNonTerminated(runners);
      });

      it(`Should not terminate a runner that became busy just before deregister runner.`, async () => {
        // setup
        const runners = [
          createRunnerTestData(
            'job-just-start-at-deregister-1',
            type,
            MINIMUM_TIME_RUNNING_IN_MINUTES + 1,
            true,
            false,
            false,
          ),
         ];
 
         mockGitHubRunners(runners);
-        mockListRunners.mockResolvedValue(runners);
+        mockAwsRunners(runners);
         mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
           return { status: 500 };
         });

        mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
          return { status: 500 };
        });

        // act and ensure no exception is thrown
        await expect(scaleDown()).resolves.not.toThrow();

        // assert
        checkTerminated(runners);
        checkNonTerminated(runners);
      });
 
       it(`Should terminate orphan.`, async () => {
         // setup
-        const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];
+        const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, false, false);
+        const idleRunner = createRunnerTestData('idle-1', type, MINIMUM_BOOT_TIME + 1, true, false, false);
+        const runners = [orphanRunner, idleRunner];
 
-        mockGitHubRunners([]);
-        mockListRunners.mockResolvedValue(runners);
+        mockGitHubRunners([idleRunner]);
+        mockAwsRunners(runners);
+
+        // act
+        await scaleDown();
+
+        // assert
+        checkTerminated(runners);
+        checkNonTerminated(runners);
+
+        expect(mockTagRunners).toHaveBeenCalledWith(orphanRunner.instanceId, [
+          {
+            Key: 'orphan',
-            Key: 'orphan',
+            Key: 'Orphan',
-            Key: 'orphan',
+            Key: 'Orphan',
+            Value: 'true',
+          },
+        ]);
+        expect(mockTagRunners).not.toHaveBeenCalledWith(idleRunner.instanceId, expect.anything());
+
+        // next cycle, update test data set orphan to true and terminate should be true
+        orphanRunner.orphan = true;
+        orphanRunner.shouldBeTerminated = true;
 
         // act
         await scaleDown();

        // assert
        checkTerminated(runners);
         checkNonTerminated(runners);
       });
 
-      it(`Should orphan termination failure is not resulting in an exception..`, async () => {
+      it(`Should ignore errors when termination orphan fails.`, async () => {
         // setup
-        const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];
+        const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true);
+        const runners = [orphanRunner];
 
         mockGitHubRunners([]);
-        mockListRunners.mockResolvedValue(runners);
-        mockTerminateRunners.mockRejectedValue(new Error('Termination failed'));
+        mockAwsRunners(runners);
+        mockTerminateRunners.mockImplementation(() => {
+          throw new Error('Failed to terminate');
+        });
 
-        // act and ensure no exception is thrown
-        await expect(scaleDown()).resolves.not.toThrow();
+        // act
+        await scaleDown();
+
+        // assert
+        checkTerminated(runners);
+        checkNonTerminated(runners);
+      });
+
+      describe('When orphan termination fails', () => {
+        it(`Should not throw in case of list runner exception.`, async () => {
+          // setup
+          const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];
+
+          mockGitHubRunners([]);
+          mockListRunners.mockRejectedValueOnce(new Error('Failed to list runners'));
+          mockAwsRunners(runners);
+
+          // ac
+          await scaleDown();
+
+          // assert
+          checkNonTerminated(runners);
+        });
+
+        it(`Should not throw in case of terminate runner exception.`, async () => {
+          // setup
+          const runners = [createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true)];
+
+          mockGitHubRunners([]);
+          mockAwsRunners(runners);
+          mockTerminateRunners.mockRejectedValue(new Error('Failed to terminate'));
+
+          // act and ensure no exception is thrown
+          await scaleDown();
+
+          // assert
+          checkNonTerminated(runners);
+        });
       });
 
       it(`Should not terminate instance in case de-register fails.`, async () => {
         // setup
-        const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, true, false)];
+        const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, false, false)];
 
         mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
           return { status: 500 };
        });
        mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
          return { status: 500 };
         });
 
         mockGitHubRunners(runners);
-        mockListRunners.mockResolvedValue(runners);
+        mockAwsRunners(runners);
 
         // act and should resolve
         await expect(scaleDown()).resolves.not.toThrow();

        // assert
        checkTerminated(runners);
        checkNonTerminated(runners);
      });

      it(`Should not throw an exception in case of failure during removing a runner.`, async () => {
        // setup
        const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, true, false)];

        mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
          throw new Error('Failed to delete runner');
        });
        mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
          throw new Error('Failed to delete runner');
         });
 
         mockGitHubRunners(runners);
-        mockListRunners.mockResolvedValue(runners);
+        mockAwsRunners(runners);
 
         // act
         await expect(scaleDown()).resolves.not.toThrow();
      });

      const evictionStrategies = ['oldest_first', 'newest_first'];
      describe.each(evictionStrategies)('When idle config defined', (evictionStrategy) => {
        const defaultConfig = {
          idleCount: 1,
          cron: '* * * * * *',
          timeZone: 'Europe/Amsterdam',
          evictionStrategy,
        };

        beforeEach(() => {
          process.env.SCALE_DOWN_CONFIG = JSON.stringify([defaultConfig]);
        });

        it(`Should terminate based on the the idle config with ${evictionStrategy} eviction strategy`, async () => {
          // setup
          const runnerToTerminateTime =
            evictionStrategy === 'oldest_first'
              ? MINIMUM_TIME_RUNNING_IN_MINUTES + 5
              : MINIMUM_TIME_RUNNING_IN_MINUTES + 1;
          const runners = [
            createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 4, true, false, false),
            createRunnerTestData('idle-to-terminate', type, runnerToTerminateTime, true, false, true),
           ];
 
           mockGitHubRunners(runners);
-          mockListRunners.mockResolvedValue(runners);
+          mockAwsRunners(runners);
 
           // act
           await scaleDown();
@@ -502,6 +564,12 @@
   });
 });
 
+function mockAwsRunners(runners: RunnerTestItem[]) {
+  mockListRunners.mockImplementation(async (filter) => {
+    return runners.filter((r) => !filter?.orphan || filter?.orphan === r.orphan);
+  });
+}
+
 function checkNonTerminated(runners: RunnerTestItem[]) {
   const notTerminated = runners.filter((r) => !r.shouldBeTerminated);
   for (const toTerminate of notTerminated) {