github-aws-runners · npalm · Aug 1, 2024 · Jul 17, 2024 · Jul 17, 2024 · Jul 17, 2024
@@ -46,7 +46,7 @@ The "Scale Up Runner" Lambda actively monitors the SQS queue, processing incomin
 
 The Lambda first requests a JIT configuration or registration token from GitHub, which is needed later by the runner to register itself. This avoids the case that the EC2 instance, which later in the process will install the agent, needs administration permissions to register the runner. Next, the EC2 spot instance is created via the launch template. The launch template defines the specifications of the required instance and contains a [`user_data`](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) script. This script will install the required software and configure it. The configuration for the runner is shared via EC2 tags and the parameter store (SSM), from which the user data script will fetch it and delete it once it has been retrieved. Once the user data script is finished, the action runner should be online, and the workflow will start in seconds.
 
-The current method for scaling down runners employs a straightforward approach: at predefined intervals, the Lambda conducts a thorough examination of each runner (instance) to assess its activity. If a runner is found to be idle, it is deregistered from GitHub, and the associated AWS instance is terminated. For ephemeral runners the the instance is terminated immediately after the workflow is finished. To avoid orphaned runners the scale down lambda is active in this cae as well.
+The current method for scaling down runners employs a straightforward approach: at predefined intervals, the Lambda conducts a thorough examination of each runner (instance) to assess its activity. If a runner is found to be idle, it is deregistered from GitHub, and the associated AWS instance is terminated. For ephemeral runners the the instance is terminated immediately after the workflow is finished. Instances not registered in GitHub as a runner after a minimal boot time will be marked orphan and removed in a next cycle. To avoid orphaned runners the scale down lambda is active in this cae as well.
 
 ### Pool
 
@@ -68,7 +68,7 @@ The AMI cleaner is a lambda that will clean up AMIs that are older than a config
 
 > This feature is Beta, changes will not trigger a major release as long in beta.
 
-The Instance Termination Watcher is creating log and optional metrics for termination of instances. Currently only spot termination warnings are watched. See [configuration](configuration/) for more details. 
+The Instance Termination Watcher is creating log and optional metrics for termination of instances. Currently only spot termination warnings are watched. See [configuration](configuration/) for more details.
 
 ### Security
 

@@ -6,10 +6,10 @@ const config: Config = {
   ...defaultConfig,
   coverageThreshold: {
     global: {
-      statements: 97.79,
-      branches: 96.13,
-      functions: 95.4,
-      lines: 98.06,
+      statements: 98.01,
+      branches: 97.28,
+      functions: 95.6,
+      lines: 97.94,
     },
   },
 };

@@ -8,7 +8,7 @@
     "test": "NODE_ENV=test nx test",
     "test:watch": "NODE_ENV=test nx test --watch",
     "lint": "yarn eslint src",
-    "watch": "ts-node-dev --respawn --exit-child --files src/local.ts",
+    "watch": "ts-node-dev --respawn --exit-child --files src/local-down.ts",
     "build": "ncc build src/lambda.ts -o dist",
     "dist": "yarn build && cd dist && zip ../runners.zip index.js",
     "format": "prettier --write \"**/*.ts\"",

@@ -9,6 +9,7 @@ export interface RunnerList {
   type?: string;
   repo?: string;
   org?: string;
+  orphan?: boolean;
 }
 
 export interface RunnerInfo {
@@ -22,6 +23,7 @@ export interface ListRunnerFilters {
   runnerType?: RunnerType;
   runnerOwner?: string;
   environment?: string;
+  orphan?: boolean;
   statuses?: string[];
 }
 

@@ -3,6 +3,7 @@ import {
   CreateFleetCommandInput,
   CreateFleetInstance,
   CreateFleetResult,
+  CreateTagsCommand,
   DefaultTargetCapacityType,
   DescribeInstancesCommand,
   DescribeInstancesResult,
@@ -16,7 +17,7 @@ import { mockClient } from 'aws-sdk-client-mock';
 import 'aws-sdk-client-mock-jest';
 
 import ScaleError from './../scale-runners/ScaleError';
-import { createRunner, listEC2Runners, terminateRunner } from './runners';
+import { createRunner, listEC2Runners, tag, terminateRunner } from './runners';
 import { RunnerInfo, RunnerInputParameters, RunnerType } from './runners.d';
 
 process.env.AWS_REGION = 'eu-east-1';
@@ -67,6 +68,23 @@ describe('list instances', () => {
       launchTime: new Date('2020-10-10T14:48:00.000+09:00'),
       type: 'Org',
       owner: 'CoderToCat',
+      orphan: false,
+    });
+  });
+
+  it('check orphan tag.', async () => {
+    const instances: DescribeInstancesResult = mockRunningInstances;
+    instances.Reservations![0].Instances![0].Tags!.push({ Key: 'ghr:orphan', Value: 'true' });
+    mockEC2Client.on(DescribeInstancesCommand).resolves(instances);
+
+    const resp = await listEC2Runners();
+    expect(resp.length).toBe(1);
+    expect(resp).toContainEqual({
+      instanceId: instances.Reservations![0].Instances![0].InstanceId!,
+      launchTime: instances.Reservations![0].Instances![0].LaunchTime!,
+      type: 'Org',
+      owner: 'CoderToCat',
+      orphan: true,
     });
   });
 
@@ -114,6 +132,23 @@ describe('list instances', () => {
     });
   });
 
+  it('filters instances on environment and orphan', async () => {
+    mockRunningInstances.Reservations![0].Instances![0].Tags!.push({
+      Key: 'ghr:orphan',
+      Value: 'true',
+    });
+    mockEC2Client.on(DescribeInstancesCommand).resolves(mockRunningInstances);
+    await listEC2Runners({ environment: ENVIRONMENT, orphan: true });
+    expect(mockEC2Client).toHaveReceivedCommandWith(DescribeInstancesCommand, {
+      Filters: [
+        { Name: 'instance-state-name', Values: ['running', 'pending'] },
+        { Name: 'tag:ghr:environment', Values: [ENVIRONMENT] },
+        { Name: 'tag:ghr:orphan', Values: ['true'] },
+        { Name: 'tag:ghr:Application', Values: ['github-action-runner'] },
+      ],
+    });
+  });
+
   it('No instances, undefined reservations list.', async () => {
     const noInstances: DescribeInstancesResult = {
       Reservations: undefined,
@@ -182,6 +217,26 @@ describe('terminate runner', () => {
   });
 });
 
+describe('tag runner', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+  it('adding extra tag', async () => {
+    mockEC2Client.on(CreateTagsCommand).resolves({});
+    const runner: RunnerInfo = {
+      instanceId: 'instance-2',
+      owner: 'owner-2',
+      type: 'Repo',
+    };
+    await tag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'truer' }]);
+
+    expect(mockEC2Client).toHaveReceivedCommandWith(CreateTagsCommand, {
+      Resources: [runner.instanceId],
+      Tags: [{ Key: 'ghr:orphan', Value: 'truer' }],
+    });
+  });
+});
+
 describe('create runner', () => {
   const defaultRunnerConfig: RunnerConfig = {
     allocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED,

@@ -1,10 +1,12 @@
 import {
   CreateFleetCommand,
   CreateFleetResult,
+  CreateTagsCommand,
   DescribeInstancesCommand,
   DescribeInstancesResult,
   EC2Client,
   FleetLaunchTemplateOverridesRequest,
+  Tag,
   TerminateInstancesCommand,
   _InstanceType,
 } from '@aws-sdk/client-ec2';
@@ -46,6 +48,9 @@
       ec2FiltersBase.push({ Name: `tag:ghr:Type`, Values: [filters.runnerType] });
       ec2FiltersBase.push({ Name: `tag:ghr:Owner`, Values: [filters.runnerOwner] });
     }
+    if (filters.orphan) {
+      ec2FiltersBase.push({ Name: 'tag:ghr:orphan', Values: ['true'] });
+    }
   }
 
   for (const key of ['tag:ghr:Application']) {
@@ -85,6 +90,7 @@
             type: i.Tags?.find((e) => e.Key === 'ghr:Type')?.Value as string,
             repo: i.Tags?.find((e) => e.Key === 'ghr:Repo')?.Value as string,
             org: i.Tags?.find((e) => e.Key === 'ghr:Org')?.Value as string,
+            orphan: i.Tags?.find((e) => e.Key === 'ghr:orphan')?.Value === 'true',
           });
         }
       }
@@ -94,10 +100,16 @@
 }
 
 export async function terminateRunner(instanceId: string): Promise<void> {
-  logger.info(`Runner '${instanceId}' will be terminated.`);
+  logger.debug(`Runner '${instanceId}' will be terminated.`);
   const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
   await ec2.send(new TerminateInstancesCommand({ InstanceIds: [instanceId] }));
-  logger.info(`Runner ${instanceId} has been terminated.`);
+  logger.debug(`Runner ${instanceId} has been terminated.`);
+}
+
+export async function tag(instanceId: string, tags: Tag[]): Promise<void> {
+  logger.debug(`Tagging '${instanceId}'`, { tags });
+  const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
+  await ec2.send(new CreateTagsCommand({ Resources: [instanceId], Tags: tags }));
 }
 
 function generateFleetOverrides(