From 5d00626f03fc097f9900b6c02c19960b79fb669e Mon Sep 17 00:00:00 2001 From: Brandon Duffany Date: Wed, 21 Feb 2024 15:47:32 -0500 Subject: [PATCH] Add initial GitHub Actions runner support --- .../rbe-ubuntu20-04-github-actions/Dockerfile | 17 ++ enterprise/server/githubapp/BUILD | 6 + enterprise/server/githubapp/githubapp.go | 163 +++++++++++++++++- enterprise/server/githubapp/runner.sh | 39 +++++ .../containers/firecracker/firecracker.go | 2 +- .../remote_execution/executor/executor.go | 3 +- .../remote_execution/platform/platform.go | 14 +- .../scheduling/task_router/task_router.go | 2 +- 8 files changed, 234 insertions(+), 12 deletions(-) create mode 100644 enterprise/dockerfiles/rbe-ubuntu20-04-github-actions/Dockerfile create mode 100644 enterprise/server/githubapp/runner.sh diff --git a/enterprise/dockerfiles/rbe-ubuntu20-04-github-actions/Dockerfile b/enterprise/dockerfiles/rbe-ubuntu20-04-github-actions/Dockerfile new file mode 100644 index 000000000000..0a5e138db180 --- /dev/null +++ b/enterprise/dockerfiles/rbe-ubuntu20-04-github-actions/Dockerfile @@ -0,0 +1,17 @@ +# TODO: build a VM image from +# https://github.com/actions/runner-images/tree/main/images/ubuntu + +FROM gcr.io/flame-public/rbe-ubuntu20-04-workflows@sha256:271e5e3704d861159c75b8dd6713dbe5a12272ec8ee73d17f89ed7be8026553f + +# Install GitHub Actions runner and required system dependencies +RUN apt-get update && \ + apt-get install -y curl && \ + mkdir /actions-runner && \ + cd /actions-runner && \ + VERSION=2.313.0 && \ + curl -O -L https://github.com/actions/runner/releases/download/v${VERSION}/actions-runner-linux-x64-${VERSION}.tar.gz && \ + tar xzf ./actions-runner-linux-x64-${VERSION}.tar.gz && \ + rm ./actions-runner-linux-x64-${VERSION}.tar.gz && \ + ./bin/installdependencies.sh && \ + chown -R 1000:1000 /actions-runner && \ + apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/enterprise/server/githubapp/BUILD b/enterprise/server/githubapp/BUILD index 5df07f293c6b..128be400877e 100644 --- a/enterprise/server/githubapp/BUILD +++ b/enterprise/server/githubapp/BUILD @@ -5,11 +5,14 @@ package(default_visibility = ["//enterprise:__subpackages__"]) go_library( name = "githubapp", srcs = ["githubapp.go"], + embedsrcs = ["runner.sh"], importpath = "github.com/buildbuddy-io/buildbuddy/enterprise/server/githubapp", deps = [ + "//enterprise/server/remote_execution/platform", "//enterprise/server/webhooks/github", "//enterprise/server/webhooks/webhook_data", "//proto:github_go_proto", + "//proto:remote_execution_go_proto", "//proto:repo_go_proto", "//proto:workflow_go_proto", "//server/backends/github", @@ -24,8 +27,10 @@ go_library( "//server/util/log", "//server/util/perms", "//server/util/retry", + "//server/util/rexec", "//server/util/scratchspace", "//server/util/status", + "//server/util/uuid", "@com_github_go_git_go_git_v5//:go-git", "@com_github_go_git_go_git_v5//config", "@com_github_go_git_go_git_v5//plumbing", @@ -34,6 +39,7 @@ go_library( "@com_github_go_git_go_git_v5//plumbing/transport/http", "@com_github_golang_jwt_jwt//:jwt", "@com_github_google_go_github_v59//github", + "@org_golang_google_protobuf//types/known/durationpb", "@org_golang_x_oauth2//:oauth2", "@org_golang_x_sync//errgroup", ], diff --git a/enterprise/server/githubapp/githubapp.go b/enterprise/server/githubapp/githubapp.go index f925cf29b6d2..47c12f3aa780 100644 --- a/enterprise/server/githubapp/githubapp.go +++ b/enterprise/server/githubapp/githubapp.go @@ -15,10 +15,15 @@ import ( "os" "path/filepath" "regexp" + "slices" + "sort" "strings" "sync" "time" + _ "embed" + + "github.com/buildbuddy-io/buildbuddy/enterprise/server/remote_execution/platform" "github.com/buildbuddy-io/buildbuddy/enterprise/server/webhooks/webhook_data" "github.com/buildbuddy-io/buildbuddy/server/environment" "github.com/buildbuddy-io/buildbuddy/server/interfaces" @@ -30,8 +35,10 @@ import ( "github.com/buildbuddy-io/buildbuddy/server/util/log" "github.com/buildbuddy-io/buildbuddy/server/util/perms" "github.com/buildbuddy-io/buildbuddy/server/util/retry" + "github.com/buildbuddy-io/buildbuddy/server/util/rexec" "github.com/buildbuddy-io/buildbuddy/server/util/scratchspace" "github.com/buildbuddy-io/buildbuddy/server/util/status" + "github.com/buildbuddy-io/buildbuddy/server/util/uuid" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" @@ -40,9 +47,11 @@ import ( "github.com/google/go-github/v59/github" "golang.org/x/oauth2" "golang.org/x/sync/errgroup" + "google.golang.org/protobuf/types/known/durationpb" gh_webhooks "github.com/buildbuddy-io/buildbuddy/enterprise/server/webhooks/github" ghpb "github.com/buildbuddy-io/buildbuddy/proto/github" + repb "github.com/buildbuddy-io/buildbuddy/proto/remote_execution" rppb "github.com/buildbuddy-io/buildbuddy/proto/repo" wfpb "github.com/buildbuddy-io/buildbuddy/proto/workflow" gh_oauth "github.com/buildbuddy-io/buildbuddy/server/backends/github" @@ -60,6 +69,10 @@ var ( privateKey = flag.String("github.app.private_key", "", "GitHub app private key.", flag.Secret) webhookSecret = flag.String("github.app.webhook_secret", "", "GitHub app webhook secret used to verify that webhook payload contents were sent by GitHub.", flag.Secret) + actionsRunnerEnabled = flag.Bool("github.app.workflows.runner_enabled", false, "Whether to enable the buildbuddy-hosted runner for GitHub actions.") + actionsRunnerExtraLabels = flag.Slice("github.app.workflows.runner_extra_labels", []string{}, "Extra labels to apply to the buildbuddy-hosted runner, in addition to 'buildbuddy' (ex: 'dev').") + actionsPoolName = flag.String("github.app.workflows.runner_pool_name", "", "Executor pool name to use for GitHub actions runner.") + validPathRegex = regexp.MustCompile(`^[a-zA-Z0-9/_-]*$`) ) @@ -68,8 +81,23 @@ const ( // Max page size that GitHub allows for list requests. githubMaxPageSize = 100 + + // Actions have to have this "runs-on" label in order to be run on + // BuildBuddy, in addition to actionsRunnerExtraLabels. + runnerPrimaryLabel = "buildbuddy" + + // How long an ephemeral GitHub actions runner task should wait without + // being assigned a job before it terminates. + runnerIdleTimeout = 5 * time.Minute + + // Max amount of time that a runner is allowed to run for until it is + // killed. This is just a safeguard for now; we eventually should remove it. + runnerTimeout = 8 * time.Hour ) +//go:embed runner.sh +var runnerScript string + func Register(env *real_environment.RealEnv) error { if !*enabled { return nil @@ -102,6 +130,10 @@ type GitHubApp struct { // privateKey is the GitHub-issued private key for the app. It is used to // create JWTs for authenticating with GitHub as the app itself. privateKey *rsa.PrivateKey + + // runnerLabels contains the set of labels that must be matched by a queued + // job in order to spawn a runner on BuildBuddy. + runnerLabels []string } // New returns a new GitHubApp handle. @@ -129,9 +161,15 @@ func New(env environment.Env) (*GitHubApp, error) { return nil, err } + var runnerLabels []string + runnerLabels = append(runnerLabels, runnerPrimaryLabel) + runnerLabels = append(runnerLabels, *actionsRunnerExtraLabels...) + sort.Strings(runnerLabels) + app := &GitHubApp{ - env: env, - privateKey: privateKey, + env: env, + privateKey: privateKey, + runnerLabels: runnerLabels, } oauth := gh_oauth.NewOAuthHandler(env, *clientID, *clientSecret, oauthAppPath) oauth.HandleInstall = app.handleInstall @@ -174,8 +212,10 @@ func (a *GitHubApp) handleWebhookEvent(ctx context.Context, eventType string, ev switch event := event.(type) { case *github.InstallationEvent: return a.handleInstallationEvent(ctx, eventType, event) + case *github.WorkflowJobEvent: + return a.handleWorkflowJobEvent(ctx, eventType, event) default: - return a.handleWorkflowEvent(ctx, eventType, event) + return a.handleBuildBuddyWorkflowEvent(ctx, eventType, event) } } @@ -204,7 +244,122 @@ func (a *GitHubApp) handleInstallationEvent(ctx context.Context, eventType strin return nil } -func (a *GitHubApp) handleWorkflowEvent(ctx context.Context, eventType string, event any) error { +func (a *GitHubApp) handleWorkflowJobEvent(ctx context.Context, eventType string, event *github.WorkflowJobEvent) error { + if !*actionsRunnerEnabled { + return nil + } + + // If this is a queued event, and one of the labels is "buildbuddy", then + // the user is requesting to run the job on one of BuildBuddy's runners. + if event.GetAction() == "queued" { + var labels []string + if event.WorkflowJob != nil { + labels = event.WorkflowJob.Labels + } + if slices.Contains(labels, "buildbuddy") { + return a.startWorkflowJob(ctx, event) + } + } + return nil +} + +func (a *GitHubApp) matchesRunnerLabels(labels []string) bool { + for _, label := range labels { + if !slices.Contains(a.runnerLabels, label) { + return false + } + } + return true +} + +func (a *GitHubApp) startWorkflowJob(ctx context.Context, event *github.WorkflowJobEvent) error { + if event.WorkflowJob == nil { + return status.FailedPreconditionError("workflow job cannot be nil") + } + + // Get an installation client. + tok, err := a.createInstallationToken(ctx, event.GetInstallation().GetID()) + if err != nil { + return err + } + client, err := a.newAuthenticatedClient(ctx, tok.GetToken()) + if err != nil { + return err + } + // Register a "just-in-time" runner config for the incoming queued job, with + // the same labels as the queued job. This lets us start a runner instance + // that is authorized to execute a single job within the repo. + // + // TODO: once https://github.com/actions/runner/issues/620 is fixed, + // restrict the runner to the exact job ID that was queued. + runnerName := uuid.New() + req := &github.GenerateJITConfigRequest{ + Name: runnerName, + RunnerGroupID: 1, // "default" group ID + Labels: a.runnerLabels, + } + jitRunnerConfig, res, err := client.Actions.GenerateRepoJITConfig(ctx, event.GetRepo().GetOwner().GetLogin(), event.GetRepo().GetName(), req) + if err := checkResponse(res, err); err != nil { + return err + } + // Spawn an ephemeral runner action on RBE. + // See the runner script in ./runner.sh + cmd := &repb.Command{ + Arguments: []string{"bash", "-ec", runnerScript}, + EnvironmentVariables: []*repb.Command_EnvironmentVariable{ + {Name: "HOME", Value: "/home/buildbuddy"}, + {Name: "PATH", Value: "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"}, + {Name: "RUNNER_IDLE_TIMEOUT", Value: fmt.Sprintf("%d", int(runnerIdleTimeout.Seconds()))}, + }, + Platform: &repb.Platform{ + // TODO: make more of these configurable (via both the workflow YAML + // and flags) + Properties: []*repb.Platform_Property{ + {Name: "container-image", Value: "docker://" + platform.Ubuntu20_04GitHubActionsImage}, + {Name: "dockerUser", Value: "buildbuddy"}, + {Name: "EstimatedComputeUnits", Value: "3"}, + {Name: "EstimatedFreeDiskBytes", Value: "20GB"}, + {Name: "github-actions-runner-labels", Value: strings.Join(a.runnerLabels, ",")}, + {Name: "init-dockerd", Value: "true"}, + {Name: "Pool", Value: *actionsPoolName}, + {Name: "recycle-runner", Value: "true"}, + {Name: "runner-recycling-max-wait", Value: "3s"}, + {Name: "workload-isolation-type", Value: "firecracker"}, + }, + }, + } + // Set jitconfig as env var via remote header to avoid storing it in CAS. + ctx = platform.WithRemoteHeaderOverride( + ctx, platform.EnvOverridesPropertyName, + "RUNNER_ENCODED_JITCONFIG="+jitRunnerConfig.GetEncodedJITConfig()) + + action := &repb.Action{ + DoNotCache: true, + Timeout: durationpb.New(runnerTimeout), + } + // TODO: respect GitRepository.instance_name_suffix, and allow manual cache + // busting via the UI by setting instance_name_suffix on the GitRepository + // row. + instanceName := "" + arn, err := rexec.Prepare(ctx, a.env, instanceName, repb.DigestFunction_SHA256, action, cmd, "" /*=inputRoot*/) + if err != nil { + return status.WrapError(err, "prepare runner action") + } + stream, err := rexec.Start(ctx, a.env, arn) + if err != nil { + return status.WrapError(err, "start runner execution") + } + op, err := stream.Recv() + if err != nil { + return status.WrapError(err, "wait for runner execution to be accepted") + } + log.CtxInfof(ctx, "Started ephemeral GitHub Actions runner execution %s", op.GetName()) + // Note: we don't wait for execution here; the RBE system is responsible for + // driving the action to completion at this point. + return nil +} + +func (a *GitHubApp) handleBuildBuddyWorkflowEvent(ctx context.Context, eventType string, event any) error { wd, err := gh_webhooks.ParseWebhookData(event) if err != nil { return err diff --git a/enterprise/server/githubapp/runner.sh b/enterprise/server/githubapp/runner.sh new file mode 100644 index 000000000000..c02acbec18b5 --- /dev/null +++ b/enterprise/server/githubapp/runner.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -e + +# Turn on job control so that each background job starts in its own process +# group, so we can clean up these jobs more easily. +set -m + +cd /actions-runner + +# When this script exits, clean up all background jobs. +trap ' + jobs -p | while read -r PID; do + echo >&2 "Cleaning up process group $PID" + kill -TERM -- -$PID || true + done +' EXIT + +# Start the runner and redirect its output to a log file. +RUNNER_LOG=/tmp/runner.log +truncate --size=0 "$RUNNER_LOG" +( + ./run.sh --jitconfig "$RUNNER_ENCODED_JITCONFIG" 2>&1 | + tee "$RUNNER_LOG" >&2 + echo >&2 'Runner exited.' +) & +RUNNER_PID=$! + +# Kill the runner if it idles for too long. +( + if ! timeout "$RUNNER_IDLE_TIMEOUT" sh -c " + tail -n+1 --follow \"$RUNNER_LOG\" 2>/dev/null | grep -q -m1 'Running job:' + "; then + echo >&2 "Runner did not pick up job within ${IDLE_TIMEOUT}s; killing." + kill -KILL -- -$RUNNER_PID + fi +) & + +wait "$RUNNER_PID" +echo >&2 "Finished waiting for runner." diff --git a/enterprise/server/remote_execution/containers/firecracker/firecracker.go b/enterprise/server/remote_execution/containers/firecracker/firecracker.go index afd4d0589617..f124f37e9ddb 100644 --- a/enterprise/server/remote_execution/containers/firecracker/firecracker.go +++ b/enterprise/server/remote_execution/containers/firecracker/firecracker.go @@ -589,7 +589,7 @@ func NewContainer(ctx context.Context, env environment.Env, task *repb.Execution c.vmIdx = opts.ForceVMIdx } - c.supportsRemoteSnapshots = *snaputil.EnableRemoteSnapshotSharing && (platform.IsCIRunner(task.GetCommand().GetArguments()) || *forceRemoteSnapshotting) + c.supportsRemoteSnapshots = *snaputil.EnableRemoteSnapshotSharing && (platform.IsCICommand(task.GetCommand()) || *forceRemoteSnapshotting) if opts.SavedState == nil { c.vmConfig.DebugMode = *debugTerminal diff --git a/enterprise/server/remote_execution/executor/executor.go b/enterprise/server/remote_execution/executor/executor.go index 94d49067d7ca..b228bc75ecfd 100644 --- a/enterprise/server/remote_execution/executor/executor.go +++ b/enterprise/server/remote_execution/executor/executor.go @@ -123,8 +123,7 @@ func isTaskMisconfigured(err error) bool { func isClientBazel(task *repb.ExecutionTask) bool { // TODO(bduffany): Find a more reliable way to determine this. - args := task.GetCommand().GetArguments() - return !platform.IsCIRunner(args) + return !platform.IsCICommand(task.GetCommand()) } func shouldRetry(task *repb.ExecutionTask, taskError error) bool { diff --git a/enterprise/server/remote_execution/platform/platform.go b/enterprise/server/remote_execution/platform/platform.go index 60424fcd038d..f9cbba2f72ef 100644 --- a/enterprise/server/remote_execution/platform/platform.go +++ b/enterprise/server/remote_execution/platform/platform.go @@ -42,6 +42,8 @@ const ( Ubuntu18_04WorkflowsImage = "gcr.io/flame-public/buildbuddy-ci-runner@sha256:8cf614fc4695789bea8321446402e7d6f84f6be09b8d39ec93caa508fa3e3cfc" Ubuntu20_04WorkflowsImage = "gcr.io/flame-public/rbe-ubuntu20-04-workflows@sha256:271e5e3704d861159c75b8dd6713dbe5a12272ec8ee73d17f89ed7be8026553f" + Ubuntu20_04GitHubActionsImage = "gcr.io/flame-public/rbe-ubuntu20-04-github-actions@sha256:2a3b50fa1aafcb8446c94ab5707270f92fa91abd64a0e049312d4a086d0abb1c" + // overrideHeaderPrefix is a prefix used to override platform props via // remote headers. The property name immediately follows the prefix in the // header key, and the header value is used as the property value. @@ -617,10 +619,14 @@ func DefaultImage() string { return *defaultImage } -// The CI runner is used to run bazel remotely. It is used for workflows and remote -// bazel -func IsCIRunner(cmdArgs []string) bool { - if len(cmdArgs) > 0 && cmdArgs[0] == "./buildbuddy_ci_runner" { +// IsCICommand returns whether the given command is either a BuildBuddy workflow +// or a GitHub Actions runner task. These commands are longer-running and may +// themselves invoke bazel. +func IsCICommand(cmd *repb.Command) bool { + if len(cmd.GetArguments()) > 0 && cmd.GetArguments()[0] == "./buildbuddy_ci_runner" { + return true + } + if FindValue(cmd.GetPlatform(), "github-actions-runner-labels") != "" { return true } return false diff --git a/enterprise/server/scheduling/task_router/task_router.go b/enterprise/server/scheduling/task_router/task_router.go index 2284cac14b38..19a381aa9bd6 100644 --- a/enterprise/server/scheduling/task_router/task_router.go +++ b/enterprise/server/scheduling/task_router/task_router.go @@ -275,7 +275,7 @@ func (runnerRecycler) routingKey(params routingParams) (string, error) { // For workflow tasks, route using GIT_BRANCH so that when re-running the // workflow multiple times using the same branch, the runs are more likely // to hit an executor with a warmer snapshot cache. - if platform.IsCIRunner(params.cmd.GetArguments()) { + if platform.IsCICommand(params.cmd) { branch := "" for _, envVar := range params.cmd.EnvironmentVariables { if envVar.GetName() == "GIT_BRANCH" {