diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index 43eaabf966..45a13a8aa4 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -191,8 +191,8 @@ runcmd: - echo `date`,`hostname`, aptinstall>>/opt/m - systemctl enable rpcbind - systemctl enable rpc-statd -- systemctl start rpcbind -- systemctl start rpc-statd +- retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart rpcbind +- retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart rpc-statd - echo `date`,`hostname`, predockerinstall>>/opt/m - retrycmd_if_failure_no_stats 180 1 5 curl -fsSL https://aptdocker.azureedge.net/gpg > /tmp/aptdocker.gpg - cat /tmp/aptdocker.gpg | apt-key add - @@ -204,7 +204,7 @@ runcmd: - echo "ExecStartPost=/sbin/iptables -P FORWARD ACCEPT" >> /etc/systemd/system/docker.service.d/exec_start.conf - systemctl daemon-reload - echo `date`,`hostname`, postdockerinstall>>/opt/m -- systemctl restart docker +- retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart docker - mkdir -p /etc/kubernetes/manifests - usermod -aG docker {{WrapAsVariable "username"}} {{if IsNSeriesSKU .}} diff --git a/parts/k8s/kubernetescustomscript.sh b/parts/k8s/kubernetescustomscript.sh index 6277f86884..96c1a8178c 100644 --- a/parts/k8s/kubernetescustomscript.sh +++ b/parts/k8s/kubernetescustomscript.sh @@ -249,8 +249,8 @@ function installClearContainersRuntime() { # Enable and start Clear Containers proxy service echo "Enabling and starting Clear Containers proxy service..." - systemctl enable cc-proxy - systemctl start cc-proxy + systemctlEnableAndCheck cc-proxy + retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart cc-proxy setKubeletOpts " --container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock" } @@ -290,7 +290,7 @@ function ensureContainerd() { # Make sure this is done after networking plugins are installed echo "Enabling and starting cri-containerd service..." systemctl enable containerd - systemctl start containerd + retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart containerd fi fi } @@ -321,23 +321,7 @@ function ensureDocker() { systemctlEnableAndCheck docker # only start if a reboot is not required if ! $REBOOTREQUIRED; then - dockerStarted=1 - for i in {1..900}; do - if ! timeout 10s $DOCKER info; then - echo "status $?" - timeout 60s /bin/systemctl restart docker - else - echo "docker started, took $i seconds" - dockerStarted=0 - break - fi - sleep 1 - done - if [ $dockerStarted -ne 0 ] - then - echo "docker did not start" - exit 2 - fi + retrycmd_if_failure 900 1 60 systemctl daemon-reload && systemctl restart docker fi } @@ -345,7 +329,7 @@ function ensureKubelet() { systemctlEnableAndCheck kubelet # only start if a reboot is not required if ! $REBOOTREQUIRED; then - systemctl restart kubelet + retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart kubelet fi } @@ -354,7 +338,7 @@ function extractHyperkube(){ systemctlEnableAndCheck hyperkube-extract # only start if a reboot is not required if ! $REBOOTREQUIRED; then - systemctl restart hyperkube-extract + retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart hyperkube-extract fi } @@ -367,7 +351,7 @@ function ensureJournal(){ echo "ForwardToSyslog=no" >> /etc/systemd/journald.conf # only start if a reboot is not required if ! $REBOOTREQUIRED; then - systemctl restart systemd-journald.service + retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart systemd-journald.service fi } diff --git a/parts/k8s/kubernetesmastercustomdata.yml b/parts/k8s/kubernetesmastercustomdata.yml index 0baa1f3271..9c9a1d7fdb 100644 --- a/parts/k8s/kubernetesmastercustomdata.yml +++ b/parts/k8s/kubernetesmastercustomdata.yml @@ -382,7 +382,7 @@ runcmd: - /bin/chown -R etcd:etcd /var/lib/etcddisk - systemctl stop etcd - systemctl daemon-reload -- systemctl restart etcd +- retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart etcd - MEMBER="$(sudo etcdctl member list | grep -E {{WrapAsVerbatim "variables('masterVMNames')[copyIndex(variables('masterOffset'))]"}} | cut -d{{WrapAsVariable "singleQuote"}}:{{WrapAsVariable "singleQuote"}} -f 1)" - sudo etcdctl member update ${MEMBER} {{WrapAsVerbatim "variables('masterEtcdPeerURLs')[copyIndex(variables('masterOffset'))]"}} - retrycmd_if_failure 5 5 10 curl --cacert /etc/kubernetes/certs/ca.crt --cert /etc/kubernetes/certs/etcdclient.crt --key /etc/kubernetes/certs/etcdclient.key --retry 5 --retry-delay 10 --retry-max-time 10 --max-time 60 "{{WrapAsVerbatim "variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))]"}}"/v2/machines @@ -397,7 +397,7 @@ runcmd: - touch /opt/azure/containers/dockerinstall.complete - echo "ExecStartPost=/sbin/iptables -P FORWARD ACCEPT" >> /etc/systemd/system/docker.service.d/exec_start.conf - systemctl daemon-reload -- systemctl restart docker +- retrycmd_if_failure 100 1 10 systemctl daemon-reload && systemctl restart docker - mkdir -p /etc/kubernetes/manifests - usermod -aG docker {{WrapAsVariable "username"}} - /usr/lib/apt/apt.systemd.daily