From db375170d6dd2eba32d2ddbed012ccb73e5efc55 Mon Sep 17 00:00:00 2001 From: Stuart Douglas Date: Wed, 18 Sep 2024 06:35:48 +1000 Subject: [PATCH] fix: restore kube health check With the recent changes to the protocol and to routing there is no real need for the controller to ping the runner. This ping was causing the ISTIO RBAC issues that resulted in us disabling the health check. fixes: #2710 --- backend/controller/controller.go | 23 +---------------------- backend/runner/runner.go | 1 + 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/backend/controller/controller.go b/backend/controller/controller.go index aa01e800b3..0941afba91 100644 --- a/backend/controller/controller.go +++ b/backend/controller/controller.go @@ -548,7 +548,7 @@ func (s *Service) ReplaceDeploy(ctx context.Context, c *connect.Request[ftlv1.Re } func (s *Service) RegisterRunner(ctx context.Context, stream *connect.ClientStream[ftlv1.RegisterRunnerRequest]) (*connect.Response[ftlv1.RegisterRunnerResponse], error) { - initialised := false + deferredDeregistration := false logger := log.FromContext(ctx) @@ -569,14 +569,6 @@ func (s *Service) RegisterRunner(ctx context.Context, stream *connect.ClientStre runnerStr := fmt.Sprintf("%s (%s)", endpoint, runnerKey) logger.Tracef("Heartbeat received from runner %s", runnerStr) - if !initialised { - err = s.pingRunner(ctx, endpoint) - if err != nil { - return nil, fmt.Errorf("runner callback failed: %w", err) - } - initialised = true - } - deploymentKey, err := model.ParseDeploymentKey(msg.Deployment) if err != nil { return nil, connect.NewError(connect.CodeInvalidArgument, err) @@ -613,19 +605,6 @@ func (s *Service) RegisterRunner(ctx context.Context, stream *connect.ClientStre return connect.NewResponse(&ftlv1.RegisterRunnerResponse{}), nil } -// Check if we can contact the runner. -func (s *Service) pingRunner(ctx context.Context, endpoint *url.URL) error { - client := rpc.Dial(ftlv1connect.NewVerbServiceClient, endpoint.String(), log.Error) - retry := backoff.Backoff{} - heartbeatCtx, cancel := context.WithTimeout(ctx, s.config.RunnerTimeout) - defer cancel() - err := rpc.Wait(heartbeatCtx, retry, client) - if err != nil { - return connect.NewError(connect.CodeUnavailable, fmt.Errorf("failed to connect to runner: %w", err)) - } - return nil -} - func (s *Service) GetDeployment(ctx context.Context, req *connect.Request[ftlv1.GetDeploymentRequest]) (*connect.Response[ftlv1.GetDeploymentResponse], error) { deployment, err := s.getDeployment(ctx, req.Msg.DeploymentKey) if err != nil { diff --git a/backend/runner/runner.go b/backend/runner/runner.go index 3ef8c4a749..cbd6d02cdd 100644 --- a/backend/runner/runner.go +++ b/backend/runner/runner.go @@ -132,6 +132,7 @@ func Start(ctx context.Context, config Config) error { return rpc.Serve(ctx, config.Bind, rpc.GRPC(ftlv1connect.NewVerbServiceHandler, svc), rpc.HTTP("/", svc), + rpc.HealthCheck(svc.healthCheck), ) }