From d0e092168557de0cdeb097a10d28f0d8ea6f3c9b Mon Sep 17 00:00:00 2001 From: Dimitrios Liappis Date: Wed, 1 May 2024 12:52:54 +0300 Subject: [PATCH] More resilient DRA packaging Occasionally packaging steps from the DRA pipeline may get stuck[^1]. This causes a breach of the global pipeline timeout (currently 1hr) and cancels the job. This commit increases the global timeout to 90min, adds one retry per step and limits the runtime per step to 40min (so that a single stuck step doesn't exhaust the entire global timeout). Finally, we shush slack notifications if the retry recovered the step. In a future PR we will consider also adding a daily DRA build to cover for cases where the retries didn't help and there were no subsequent commits to trigger a new build. [^1]: https://buildkite.com/elastic/beats-packaging-pipeline/builds/114 --- .buildkite/packaging.pipeline.yml | 32 +++++++++++++++++++++++++++++++ catalog-info.yaml | 3 ++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.buildkite/packaging.pipeline.yml b/.buildkite/packaging.pipeline.yml index c01428100ec..5fd559f458d 100644 --- a/.buildkite/packaging.pipeline.yml +++ b/.buildkite/packaging.pipeline.yml @@ -44,6 +44,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 commands: - make build/distributions/dependencies.csv - make beats-dashboards @@ -62,6 +66,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 commands: - make build/distributions/dependencies.csv - make beats-dashboards @@ -86,6 +94,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -116,6 +128,10 @@ steps: provider: "aws" imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" instanceType: "${AWS_ARM_INSTANCE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -142,6 +158,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "c2-standard-16" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* @@ -161,6 +181,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -191,6 +215,10 @@ steps: provider: "aws" imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" instanceType: "${AWS_ARM_INSTANCE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -217,6 +245,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "c2-standard-16" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* diff --git a/catalog-info.yaml b/catalog-info.yaml index 420d9c1c16a..56b697fa6ee 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -1045,7 +1045,7 @@ spec: # branch_configuration: "main 8.* 7.17" cancel_intermediate_builds: false skip_intermediate_builds: false - maximum_timeout_in_minutes: 60 + maximum_timeout_in_minutes: 90 provider_settings: build_branches: true build_pull_request_forks: false @@ -1059,6 +1059,7 @@ spec: ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true' SLACK_NOTIFICATIONS_CHANNEL: '#ingest-notifications' SLACK_NOTIFICATIONS_ON_SUCCESS: 'false' + SLACK_NOTIFICATIONS_SKIP_FOR_RETRIES: 'true' teams: ingest-fp: access_level: MANAGE_BUILD_AND_READ