Skip to content

Commit

Permalink
Enable non-org autoscaler to read scale-config from any repo (#5767)
Browse files Browse the repository at this point in the history
Enable putting scale-config in test-infra, even for non-organization
runners. Before, only scale-configs for organization-scoped runners
could be put in arbitrary repos

Significant change:
- `scale_config_repo` no longer defaults to test-infra. You now _have_
to specify a value if you're using org runners. Non-org runners will
default to the job's repo. pytorch-gha-infra and ci-infra will need to
be updated accordingly.

Testing: Deployed these changes to LF canary, terminated all lf-c
instances, and verified that creating a new pull request there would
still result in instances getting provisioned
  • Loading branch information
ZainRizvi authored Oct 18, 2024
1 parent 62aa6b8 commit dc78e2a
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,18 @@ describe('Config', () => {
expect(Config.Instance.mustHaveIssuesLabels).toEqual([]);
expect(Config.Instance.runnerGroupName).toBeUndefined();
expect(Config.Instance.runnersExtraLabels).toBeUndefined();
expect(Config.Instance.scaleConfigRepo).toEqual('test-infra');
expect(Config.Instance.scaleConfigRepo).toEqual('');
expect(Config.Instance.scaleConfigRepoPath).toEqual('.github/scale-config.yml');
expect(Config.Instance.secretsManagerSecretsId).toBeUndefined();
expect(Config.Instance.shuffledAwsRegionInstances).toEqual([]);
expect(Config.Instance.enableOrganizationRunners).toBeFalsy();
});

it('requires scaleConfigRepo to be set when organization runners are enabled', () => {
Config.resetConfig();
process.env.ENABLE_ORGANIZATION_RUNNERS = 'True';
process.env.SCALE_CONFIG_REPO = '';

expect(() => Config.Instance).toThrowError('SCALE_CONFIG_REPO is required when ENABLE_ORGANIZATION_RUNNERS is set');
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ export class Config {
this.runnerGroupName = process.env.RUNNER_GROUP_NAME;
this.runnersExtraLabels = process.env.RUNNER_EXTRA_LABELS;
/* istanbul ignore next */
this.scaleConfigRepo = process.env.SCALE_CONFIG_REPO || 'test-infra';
this.scaleConfigRepo = process.env.SCALE_CONFIG_REPO || '';
if (this.enableOrganizationRunners && !this.scaleConfigRepo) {
throw new Error('SCALE_CONFIG_REPO is required when ENABLE_ORGANIZATION_RUNNERS is set');
}
this.scaleConfigRepoPath = process.env.SCALE_CONFIG_REPO_PATH || '.github/scale-config.yml';
this.secretsManagerSecretsId = process.env.SECRETSMANAGER_SECRETS_ID;
this.sSMParamCleanupAgeDays = Number(process.env.SSM_PARAM_CLEANUP_AGE_DAYS || '7');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1257,57 +1257,119 @@ describe('scale-down', () => {
describe('repo runners', () => {
const runnerType = 'runnerTypeDef';
const owner = 'the-org';
const repo: Repo = {
const runnerRepo: Repo = {
owner: owner,
repo: 'a-repo',
};
const repoKey = `${owner}/a-repo`;
const runnerRepoKey = `${owner}/a-repo`;

beforeEach(() => {
jest.spyOn(Config, 'Instance', 'get').mockImplementation(
() =>
({
...baseConfig,
enableOrganizationRunners: false,
} as unknown as Config),
);
});
describe('When no scaleConfigRepo is set, the runner repo is used as the source for scale config', () => {
beforeEach(() => {
jest.spyOn(Config, 'Instance', 'get').mockImplementation(
() =>
({
...baseConfig,
enableOrganizationRunners: false,
} as unknown as Config),
);
});

it('is_ephemeral === undefined', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);
it('is_ephemeral === undefined', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);

mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, {} as RunnerType]]));
mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, {} as RunnerType]]));

expect(await isEphemeralRunner({ runnerType: runnerType, repo: repoKey } as RunnerInfo, metrics)).toEqual(
false,
);
expect(
await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics),
).toEqual(false);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics);
});
expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(runnerRepo, metrics);
});

it('is_ephemeral === true', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);
it('is_ephemeral === true', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);

mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: true } as RunnerType]]));
mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: true } as RunnerType]]));

expect(await isEphemeralRunner({ runnerType: runnerType, repo: repoKey } as RunnerInfo, metrics)).toEqual(true);
expect(
await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics),
).toEqual(true);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics);
expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(runnerRepo, metrics);
});

it('is_ephemeral === false', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);

mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: false } as RunnerType]]));

expect(
await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics),
).toEqual(false);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(runnerRepo, metrics);
});
});

it('is_ephemeral === false', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);
describe("When a scaleConfigRepo is set, it's used as the source for scale config", () => {
const centralRepoName = 'central-repo'; // to be the test-infra equivalent
const centralRepo: Repo = {
owner: owner,
repo: centralRepoName,
};

mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: false } as RunnerType]]));
beforeEach(() => {
jest.spyOn(Config, 'Instance', 'get').mockImplementation(
() =>
({
...baseConfig,
enableOrganizationRunners: false,
scaleConfigRepo: centralRepoName,
} as unknown as Config),
);
});

expect(await isEphemeralRunner({ runnerType: runnerType, repo: repoKey } as RunnerInfo, metrics)).toEqual(
false,
);
it('is_ephemeral === undefined', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics);
mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, {} as RunnerType]]));

expect(
await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics),
).toEqual(false);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(centralRepo, metrics);
});

it('is_ephemeral === true', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);

mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: true } as RunnerType]]));

expect(
await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics),
).toEqual(true);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(centralRepo, metrics);
});

it('is_ephemeral === false', async () => {
const mockedGetRunnerTypes = mocked(getRunnerTypes);

mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: false } as RunnerType]]));

expect(
await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics),
).toEqual(false);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith(centralRepo, metrics);
});
});
});
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ export async function isEphemeralRunner(ec2runner: RunnerInfo, metrics: ScaleDow
}

const repo: Repo = (() => {
if (Config.Instance.enableOrganizationRunners) {
if (Config.Instance.scaleConfigRepo) {
return {
owner: ec2runner.org !== undefined ? (ec2runner.org as string) : getRepo(ec2runner.repo as string).owner,
repo: Config.Instance.scaleConfigRepo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,47 @@ describe('scaleUp', () => {
expect(mockedListGithubRunners).not.toBeCalled();
});

it('uses the scaleConfigRepo when provided', async () => {
jest.spyOn(Config, 'Instance', 'get').mockImplementation(
() =>
({
...baseCfg,
enableOrganizationRunners: false,
scaleConfigRepo: 'scale-config-repo',
} as unknown as Config),
);
const payload = {
id: 10,
eventType: 'event',
repositoryName: 'repo',
repositoryOwner: 'owner',
installationId: 2,
runnerLabels: ['label1', 'label2'],
};
const mockedGetRunnerTypes = mocked(getRunnerTypes).mockResolvedValue(
new Map([
[
'label1-nomatch',
{
instance_type: 'instance_type',
os: 'os',
max_available: 33,
disk_size: 113,
runnerTypeName: 'runnerTypeName',
is_ephemeral: false,
},
],
]),
);
const mockedListGithubRunners = mocked(listGithubRunnersRepo);

await scaleUp('aws:sqs', payload, metrics);

expect(mockedGetRunnerTypes).toBeCalledTimes(1);
expect(mockedGetRunnerTypes).toBeCalledWith({ repo: 'scale-config-repo', owner: 'owner' }, metrics);
expect(mockedListGithubRunners).not.toBeCalled();
});

it('have available runners', async () => {
jest.spyOn(Config, 'Instance', 'get').mockImplementation(
() =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export async function scaleUp(
const runnerTypes = await getRunnerTypes(
{
owner: repo.owner,
repo: Config.Instance.enableOrganizationRunners ? Config.Instance.scaleConfigRepo : repo.repo,
repo: Config.Instance.scaleConfigRepo || repo.repo,
},
metrics,
);
Expand Down
10 changes: 10 additions & 0 deletions terraform-aws-github-runner/modules/runners/scale-down.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ resource "aws_lambda_function" "scale_down" {
tags = local.tags
memory_size = 2048

lifecycle {
precondition {
# Enforce that a value for scale_config_repo is set when enable_organization_runners is set to true.
# Setting the value is optional when not using organization runners since we'll default to the
# job's repository.
condition = var.enable_organization_runners == true ? var.scale_config_repo != "" : true
error_message = "scale_config_repo is required when enable_organization_runners is set to true"
}
}

environment {
variables = {
AWS_REGION_INSTANCES = join(",", var.aws_region_instances)
Expand Down
10 changes: 10 additions & 0 deletions terraform-aws-github-runner/modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ resource "aws_lambda_function" "scale_up" {
memory_size = 2048
publish = true

lifecycle {
precondition {
# Enforce that a value for scale_config_repo is set when enable_organization_runners is set to true.
# Setting the value is optional when not using organization runners since we'll default to the
# job's repository.
condition = var.enable_organization_runners == true ? var.scale_config_repo != "" : true
error_message = "scale_config_repo is required when enable_organization_runners is set to true"
}
}

environment {
variables = {
CANT_HAVE_ISSUES_LABELS = join(",", var.cant_have_issues_labels)
Expand Down
4 changes: 2 additions & 2 deletions terraform-aws-github-runner/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ variable "cant_have_issues_labels" {
}

variable "scale_config_repo" {
description = "Repository to fetch scale config from if `enable_organization_runners` is set to true. Otherwise the job's repo will be used"
default = "" # Internally defaults to 'test-infra'
description = "Repository to fetch scale config from. Optional if `enable_organization_runners` is set to false, in which case the job's repo will be used"
default = ""
type = string
}

Expand Down

0 comments on commit dc78e2a

Please sign in to comment.