Skip to content

Commit

Permalink
bug: enforce each addon manager pod (#99)
Browse files Browse the repository at this point in the history
  • Loading branch information
jadarsie committed Mar 9, 2023
1 parent e1ba93e commit 03a2c5e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 16 deletions.
15 changes: 11 additions & 4 deletions parts/k8s/cloud-init/artifacts/cse_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -423,16 +423,23 @@ fi
}

ensureKubeAddonManager() {
local kam_pod=kube-addon-manager-${HOSTNAME}
{{/* Wait 30 sec for kube-addon-manager to become Ready */}}
if ! retrycmd 6 5 30 ${KUBECTL} wait --for=condition=Ready --timeout=5s po ${kam_pod} -n kube-system; then
{{/* Restart kubelet if kube-addon-manager is not Ready after timeout */}}
systemctl_restart 3 5 30 kubelet
fi
{{/* Wait 5 mins for kube-addon-manager to become Ready */}}
if ! retrycmd 60 5 30 ${KUBECTL} wait --for=condition=Ready --timeout=5s -l app=kube-addon-manager po -n kube-system; then
if ! retrycmd 60 5 30 ${KUBECTL} wait --for=condition=Ready --timeout=5s po ${kam_pod} -n kube-system; then
{{/* Restart kubelet if kube-addon-manager is not Ready after 5 mins */}}
systemctl_restart 3 5 30 kubelet
{{/* Wait 5 more mins for kube-addon-manager to become Ready, and then return failure if not */}}
retrycmd 60 5 30 ${KUBECTL} wait --for=condition=Ready --timeout=5s -l app=kube-addon-manager po -n kube-system || exit_cse {{GetCSEErrorCode "ERR_ADDONS_START_FAIL"}} $GET_KUBELET_LOGS
retrycmd 60 5 30 ${KUBECTL} wait --for=condition=Ready --timeout=5s po ${kam_pod} -n kube-system || exit_cse {{GetCSEErrorCode "ERR_ADDONS_START_FAIL"}} $GET_KUBELET_LOGS
fi
}

ensureAddons() {
local kam_pod=kube-addon-manager-${HOSTNAME}
{{- if IsDashboardAddonEnabled}} {{/* Note: dashboard addon is deprecated */}}
retrycmd 120 5 30 $KUBECTL get namespace kubernetes-dashboard || exit_cse {{GetCSEErrorCode "ERR_ADDONS_START_FAIL"}} $GET_KUBELET_LOGS
{{- end}}
Expand All @@ -446,8 +453,8 @@ ensureAddons() {
rm -Rf ${ADDONS_DIR}/init
ensureKubeAddonManager
{{/* Manually delete any kube-addon-manager pods that point to the init directory */}}
for initPod in $(${KUBECTL} get pod -l app=kube-addon-manager -n kube-system -o json | jq -r '.items[] | select(.spec.containers[0].env[] | select(.value=="/etc/kubernetes/addons/init")) | select(.status.phase=="Running") .metadata.name'); do
retrycmd 120 5 30 ${KUBECTL} delete pod $initPod -n kube-system --force --grace-period 0 || exit_cse {{GetCSEErrorCode "ERR_ADDONS_START_FAIL"}} $GET_KUBELET_LOGS
for initPod in $(${KUBECTL} get pod ${kam_pod} -n kube-system -o json | jq -r '.items[] | select(.spec.containers[0].env[] | select(.value=="/etc/kubernetes/addons/init")) | select(.status.phase=="Running") .metadata.name'); do
retrycmd 120 5 30 ${KUBECTL} delete pod ${kam_pod} -n kube-system --force --grace-period 0 || exit_cse {{GetCSEErrorCode "ERR_ADDONS_START_FAIL"}} $GET_KUBELET_LOGS
done
{{if HasCiliumNetworkPolicy}}
while [ ! -f /etc/cni/net.d/05-cilium.conf ]; do
Expand Down
15 changes: 11 additions & 4 deletions pkg/engine/templates_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 9 additions & 8 deletions test/e2e/kubernetes/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ var _ = AfterSuite(func() {
fmt.Println(string(stdout))
}
if cfg.DebugAfterSuite {
cmd := exec.Command("k", "get", "deployments,pods,svc,daemonsets,configmaps,endpoints,jobs,clusterroles,clusterrolebindings,roles,rolebindings,storageclasses,podsecuritypolicy", "--all-namespaces", "-o", "wide")
cmd := exec.Command("k", "get", "deployments,pods,svc,daemonsets,configmaps,endpoints,jobs,clusterroles,clusterrolebindings,roles,rolebindings,storageclasses", "--all-namespaces", "-o", "wide")
out, err := cmd.CombinedOutput()
log.Printf("%s\n", out)
if err != nil {
Expand Down Expand Up @@ -729,12 +729,13 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
})

It("should have core kube-system componentry running", func() {
coreComponents := []string{
common.AddonManagerComponentName,
common.APIServerComponentName,
common.ControllerManagerComponentName,
common.KubeProxyAddonName,
common.SchedulerComponentName,
coreComponents := []string{common.KubeProxyAddonName}
masterPrefix := eng.ExpandedDefinition.Properties.GetMasterVMPrefix()
for i := 0; i < eng.ExpandedDefinition.Properties.MasterProfile.Count; i++ {
coreComponents = append(coreComponents, fmt.Sprintf("%s-%s%d", common.AddonManagerComponentName, masterPrefix, i))
coreComponents = append(coreComponents, fmt.Sprintf("%s-%s%d", common.APIServerComponentName, masterPrefix, i))
coreComponents = append(coreComponents, fmt.Sprintf("%s-%s%d", common.ControllerManagerComponentName, masterPrefix, i))
coreComponents = append(coreComponents, fmt.Sprintf("%s-%s%d", common.SchedulerComponentName, masterPrefix, i))
}
if to.Bool(eng.ExpandedDefinition.Properties.OrchestratorProfile.KubernetesConfig.UseCloudControllerManager) {
coreComponents = append(coreComponents, common.CloudControllerManagerComponentName)
Expand Down Expand Up @@ -861,7 +862,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
})

It("should print cluster resources", func() {
cmd := exec.Command("k", "get", "deployments,pods,svc,daemonsets,configmaps,endpoints,jobs,clusterroles,clusterrolebindings,roles,rolebindings,storageclasses,podsecuritypolicy", "--all-namespaces", "-o", "wide")
cmd := exec.Command("k", "get", "deployments,pods,svc,daemonsets,configmaps,endpoints,jobs,clusterroles,clusterrolebindings,roles,rolebindings,storageclasses", "--all-namespaces", "-o", "wide")
out, err := cmd.CombinedOutput()
log.Printf("%s\n", out)
if err != nil {
Expand Down

0 comments on commit 03a2c5e

Please sign in to comment.