From dde451d96a4d7542329bd570cfa5cd3ebb4a2d3f Mon Sep 17 00:00:00 2001 From: Liu Shilong Date: Thu, 16 Feb 2023 21:58:14 +0800 Subject: [PATCH] [ci] Kill hanged docker build process to avoid build timeout issue. (#13726) Why I did it Docker build has a low rate of hanging up. It hangs on different steps. So, it looks like a bug in docker daemon. How I did it Start a daemon process to scan running time more than 1 hours, and kill the process. How to verify it --- .azure-pipelines/azure-pipelines-build.yml | 1 + .azure-pipelines/cleanup.yml | 7 +++++++ .azure-pipelines/template-daemon.yml | 24 ++++++++++++++++++++++ .azure-pipelines/template-variables.yml | 2 ++ 4 files changed, 34 insertions(+) create mode 100644 .azure-pipelines/template-daemon.yml diff --git a/.azure-pipelines/azure-pipelines-build.yml b/.azure-pipelines/azure-pipelines-build.yml index 4d8516b63afa..5e685d9a167f 100644 --- a/.azure-pipelines/azure-pipelines-build.yml +++ b/.azure-pipelines/azure-pipelines-build.yml @@ -98,6 +98,7 @@ jobs: buildSteps: - template: template-skipvstest.yml + - template: template-daemon.yml - bash: | set -ex if [ $(GROUP_NAME) == vs ]; then diff --git a/.azure-pipelines/cleanup.yml b/.azure-pipelines/cleanup.yml index 6569c8ed0bf3..4f74de0f413a 100644 --- a/.azure-pipelines/cleanup.yml +++ b/.azure-pipelines/cleanup.yml @@ -1,5 +1,11 @@ steps: - script: | + set -x + # kill daemon process + ps $(cat /tmp/azp_daemon_kill_docker_pid) + sudo kill $(cat /tmp/azp_daemon_kill_docker_pid) + rm /tmp/azp_daemon_kill_docker_pid + if sudo [ -f /var/run/march/docker.pid ] ; then pid=`sudo cat /var/run/march/docker.pid` ; sudo kill $pid fi @@ -11,4 +17,5 @@ steps: pid=`sudo cat dockerfs/var/run/docker.pid` ; sudo kill $pid fi sudo rm -rf $(ls -A1) + condition: always() displayName: "Clean Workspace" diff --git a/.azure-pipelines/template-daemon.yml b/.azure-pipelines/template-daemon.yml new file mode 100644 index 000000000000..b0915557ced2 --- /dev/null +++ b/.azure-pipelines/template-daemon.yml @@ -0,0 +1,24 @@ +steps: +- bash: | + ( + while true + do + sleep 120 + now=$(date +%s) + pids=$(ps -C docker -o pid,etime,args | grep "docker build" | cut -d" " -f1) + for pid in $pids + do + start=$(date --date="$(ls -dl /proc/$pid --time-style full-iso | awk '{print$6,$7}')" +%s) + time_s=$(($now-$start)) + if [[ $time_s -gt $(DOCKER_BUILD_TIMEOUT) ]]; then + echo =========== $(date +%F%T) $time_s &>> target/daemon.log + ps $pid &>> target/daemon.log + sudo kill $pid + fi + done + done + ) & + daemon_pid=$! + ps $daemon_pid + echo $daemon_pid >> /tmp/azp_daemon_kill_docker_pid + displayName: start daemon to kill hang docker diff --git a/.azure-pipelines/template-variables.yml b/.azure-pipelines/template-variables.yml index 027dfee9986a..42d46182d263 100644 --- a/.azure-pipelines/template-variables.yml +++ b/.azure-pipelines/template-variables.yml @@ -4,3 +4,5 @@ variables: SONIC_SLAVE_DOCKER_DRIVER: 'overlay2' SONIC_BUILD_RETRY_COUNT: 3 SONIC_BUILD_RETRY_INTERVAL: 600 + DOCKER_BUILDKIT: 0 + DOCKER_BUILD_TIMEOUT: 3600