From 726f6e9bdec715f958ba47500e77feb5655b0a48 Mon Sep 17 00:00:00 2001 From: Dimitrios Liappis Date: Wed, 1 May 2024 18:04:21 +0300 Subject: [PATCH] More resilient DRA packaging (#39332) Occasionally packaging steps from the DRA pipeline may get stuck[^1]. This causes a breach of the global pipeline timeout (currently 1hr) and cancels the job. This commit increases the global timeout to 90min, adds one retry per step and limits the runtime per step to 40min (so that a single stuck step doesn't exhaust the entire global timeout). Finally, we shush slack notifications if the retry recovered the step. In a future PR we will consider also adding a daily DRA build to cover for cases where the retries didn't help and there were no subsequent commits to trigger a new build. [^1]: https://buildkite.com/elastic/beats-packaging-pipeline/builds/114 --- .buildkite/packaging.pipeline.yml | 32 +++++++++++++++++++++++++++++++ catalog-info.yaml | 3 ++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.buildkite/packaging.pipeline.yml b/.buildkite/packaging.pipeline.yml index c01428100ecf..5fd559f458d3 100644 --- a/.buildkite/packaging.pipeline.yml +++ b/.buildkite/packaging.pipeline.yml @@ -44,6 +44,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 commands: - make build/distributions/dependencies.csv - make beats-dashboards @@ -62,6 +66,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 commands: - make build/distributions/dependencies.csv - make beats-dashboards @@ -86,6 +94,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -116,6 +128,10 @@ steps: provider: "aws" imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" instanceType: "${AWS_ARM_INSTANCE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -142,6 +158,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "c2-standard-16" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* @@ -161,6 +181,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -191,6 +215,10 @@ steps: provider: "aws" imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" instanceType: "${AWS_ARM_INSTANCE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* matrix: @@ -217,6 +245,10 @@ steps: provider: gcp image: "${IMAGE_UBUNTU_X86_64}" machineType: "c2-standard-16" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 artifact_paths: - build/distributions/**/* diff --git a/catalog-info.yaml b/catalog-info.yaml index f81d3a6df1a0..34d9e397ca3e 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -1045,7 +1045,7 @@ spec: # branch_configuration: "main 8.* 7.17" cancel_intermediate_builds: false skip_intermediate_builds: false - maximum_timeout_in_minutes: 60 + maximum_timeout_in_minutes: 90 provider_settings: build_branches: true build_pull_request_forks: false @@ -1059,6 +1059,7 @@ spec: ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true' SLACK_NOTIFICATIONS_CHANNEL: '#ingest-notifications' SLACK_NOTIFICATIONS_ON_SUCCESS: 'false' + SLACK_NOTIFICATIONS_SKIP_FOR_RETRIES: 'true' teams: ingest-fp: access_level: MANAGE_BUILD_AND_READ