From b8131116c8c968134cb20666fb83939f78135d46 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 23 Feb 2023 11:39:29 -0500 Subject: [PATCH] jobs/build: stop waiting for multi-arch jobs to take lock The main reason we added that was because in the new "rerun build-arch and release jobs" path, there was a higher likelihood that the release job could in theory take the locks before the build-arch jobs. But with 0664cd6 ("jobs/build: wait when re-running mArch jobs"), this is no longer a concern. There's still the theoretical possibility the race happens even in the regular path (especially when `EARLY_ARCH_JOBS` is unset), but (1) something must be really slow in the multi-arch jobs for that to happen (in which case, it might end up taking more than our 5 minute timeout anyway) and (2) the worst case is that we release without that arch before it's built, which is salvageable (by rerunning the release job). So overall, IMO maintaining this code is not worth the complexity. We can always bring it back and adjust the timeout if this is a recurring issue. --- jobs/build.Jenkinsfile | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/jobs/build.Jenkinsfile b/jobs/build.Jenkinsfile index ccbce3480..f5a475588 100644 --- a/jobs/build.Jenkinsfile +++ b/jobs/build.Jenkinsfile @@ -1,5 +1,4 @@ import org.yaml.snakeyaml.Yaml; -import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; node { checkout scm @@ -522,16 +521,6 @@ def run_multiarch_jobs(arches, src_commit, version, cosa_img, wait) { string(name: 'PIPECFG_HOTFIX_REPO', value: params.PIPECFG_HOTFIX_REPO), string(name: 'PIPECFG_HOTFIX_REF', value: params.PIPECFG_HOTFIX_REF) ] - if (!wait) { - // Wait until the locks taken by the `build-arch` jobs are taken - // before continuing. This closes a potential race in which once we - // trigger the `release` job afterwards, it could end up taking the - // locks before the multi-arch jobs. - // This really should never take more than 5 minutes. Having a - // timeout ensures we don't wait for a long time if we somehow - // missed the transition. - wait_until_locked_or_continue("release-${version}-${arch}", 5) - } }]} } } @@ -552,24 +541,3 @@ def run_release_job(buildID) { ] } } - -// XXX: generalize and put in coreos-ci-lib eventually -def wait_until_locked_or_continue(resource, timeout_mins) { - try { - timeout(time: timeout_mins, unit: 'MINUTES') { - waitUntil { - lock(resource: resource, skipIfLocked: true) { - return false - } - return true - } - } - } catch (FlowInterruptedException e) { - // If the lock was still not taken, then something went wrong. For - // example, the job might've failed during the initial `git clone`. The - // timeout is to ensure we don't wait forever and here we continue to - // try to at least release for the arches that did succeed. We may be - // able to salvage the failed arch in the next run. - echo "Timed out waiting for lock ${resource} to be taken. Continuing..." - } -}