From 5f2a0bfca2ffd1f7913a2f7b7762f7f11b6de633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20de=20la=20Pe=C3=B1a?= Date: Thu, 16 Dec 2021 13:52:34 +0100 Subject: [PATCH] fix: add resiliency when an integration cannot be added to a policy (#1914) * chore: do not exit if the integration cannot be added to the policy * chore: add resiliency when adding an integration to a policy We have detected 500 codes in the staging environment of package-registry [2021-12-16T05:41:55.069Z] FATA[2021-12-16T05:41:53Z] Unable to add integration to policy err="could not add package to policy; API status code = 500; response body = {\"statusCode\":500,\"error\":\"Internal Server Error\",\"message\":\"'502 Bad Gateway' error response from package registry at https://epr-staging.elastic.co/package/endpoint/1.4.0\"}" packageDS="{ endpoint-50fff084-9495-4633-b1a7-0a817b61d6bc Endpoint Security default cdd3d370-5e32-11ec-9ece-534be1f506d0 true [] { endpoint Endpoint Security 1.4.0}}" * fix: add missing return * fix: format source (cherry picked from commit 66d399a06a9c5f2fea003670d15736e26a764361) --- e2e/_suites/fleet/fleet.go | 3 +- internal/kibana/integrations.go | 70 +++++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/e2e/_suites/fleet/fleet.go b/e2e/_suites/fleet/fleet.go index 0976e0fec9..67062fda51 100644 --- a/e2e/_suites/fleet/fleet.go +++ b/e2e/_suites/fleet/fleet.go @@ -1010,7 +1010,8 @@ func theIntegrationIsOperatedInThePolicy(ctx context.Context, client *kibana.Cli log.WithFields(log.Fields{ "err": err, "packageDS": packageDataStream, - }).Fatal("Unable to add integration to policy") + }).Error("Unable to add integration to policy") + return err } } else if strings.ToLower(action) == actionREMOVED { packageDataStream, err := client.GetIntegrationFromAgentPolicy(ctx, integration.Name, policy) diff --git a/internal/kibana/integrations.go b/internal/kibana/integrations.go index 426b0d3ed7..058c3f52e9 100644 --- a/internal/kibana/integrations.go +++ b/internal/kibana/integrations.go @@ -5,8 +5,11 @@ import ( "encoding/json" "fmt" "strings" + "time" "github.com/Jeffail/gabs/v2" + "github.com/cenkalti/backoff/v4" + "github.com/elastic/e2e-testing/internal/utils" "github.com/pkg/errors" log "github.com/sirupsen/logrus" "go.elastic.co/apm" @@ -22,24 +25,67 @@ type IntegrationPackage struct { // AddIntegrationToPolicy adds an integration to policy func (c *Client) AddIntegrationToPolicy(ctx context.Context, packageDS PackageDataStream) error { - span, _ := apm.StartSpanOptions(ctx, "Adding integration to policy", "fleet.package.add-to-policy", apm.SpanOptions{ - Parent: apm.SpanFromContext(ctx).TraceContext(), - }) - defer span.End() + maxTimeout := time.Duration(utils.TimeoutFactor) * time.Minute + retryCount := 1 - reqBody, err := json.Marshal(packageDS) - if err != nil { - return errors.Wrap(err, "could not convert policy-package (request) to JSON") + exp := utils.GetExponentialBackOff(maxTimeout) + + addIntegrationFn := func() error { + span, _ := apm.StartSpanOptions(ctx, "Adding integration to policy", "fleet.package.add-to-policy", apm.SpanOptions{ + Parent: apm.SpanFromContext(ctx).TraceContext(), + }) + defer span.End() + + reqBody, err := json.Marshal(packageDS) + if err != nil { + log.WithFields(log.Fields{ + "elapsedTime": exp.GetElapsedTime(), + "err": err, + "package": packageDS, + "retry": retryCount, + }).Warn("Could not convert policy-package (request) to JSON. Retrying") + + retryCount++ + + return err + } + + statusCode, respBody, err := c.post(ctx, fmt.Sprintf("%s/package_policies", FleetAPI), reqBody) + if err != nil { + log.WithFields(log.Fields{ + "elapsedTime": exp.GetElapsedTime(), + "err": err, + "package": packageDS, + "retry": retryCount, + }).Warn("Could not add package to policy. Retrying") + + retryCount++ + + return err + } + + if statusCode != 200 { + log.WithFields(log.Fields{ + "elapsedTime": exp.GetElapsedTime(), + "err": err, + "statusCode": statusCode, + "response": respBody, + "package": packageDS, + "retry": retryCount, + }).Warn("could not add package to policy because of HTTP code is not 200") + + retryCount++ + return fmt.Errorf("could not add package to policy; API status code = %d; response body = %s", statusCode, respBody) + } + + return nil } - statusCode, respBody, err := c.post(ctx, fmt.Sprintf("%s/package_policies", FleetAPI), reqBody) + err := backoff.Retry(addIntegrationFn, exp) if err != nil { - return errors.Wrap(err, "could not add package to policy") + return err } - if statusCode != 200 { - return fmt.Errorf("could not add package to policy; API status code = %d; response body = %s", statusCode, respBody) - } return nil }