From 3255c3a45d760e96641bafec355a0b07fc7029c5 Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Wed, 29 Nov 2017 20:51:30 -0800 Subject: [PATCH 1/7] fixing agent pool upgrade logic --- pkg/operations/kubernetesupgrade/upgrader.go | 116 +++++++++++-------- 1 file changed, 70 insertions(+), 46 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgrader.go b/pkg/operations/kubernetesupgrade/upgrader.go index 7343df63fb..d144ec2afa 100644 --- a/pkg/operations/kubernetesupgrade/upgrader.go +++ b/pkg/operations/kubernetesupgrade/upgrader.go @@ -56,14 +56,14 @@ func (ku *Upgrader) upgradeMasterNodes() error { if ku.ClusterTopology.DataModel.Properties.MasterProfile == nil { return nil } - ku.logger.Infof("Master nodes StorageProfile: %s\n", ku.ClusterTopology.DataModel.Properties.MasterProfile.StorageProfile) + ku.logger.Infof("Master nodes StorageProfile: %s", ku.ClusterTopology.DataModel.Properties.MasterProfile.StorageProfile) // Upgrade Master VMs templateMap, parametersMap, err := ku.generateUpgradeTemplate(ku.ClusterTopology.DataModel) if err != nil { return ku.Translator.Errorf("error generating upgrade template: %s", err.Error()) } - ku.logger.Infof("Prepping master nodes for upgrade...\n") + ku.logger.Infof("Prepping master nodes for upgrade...") transformer := &acsengine.Transformer{ Translator: ku.Translator, @@ -88,14 +88,14 @@ func (ku *Upgrader) upgradeMasterNodes() error { mastersUpgradedCount := len(*ku.ClusterTopology.UpgradedMasterVMs) mastersToUgradeCount := expectedMasterCount - mastersUpgradedCount - ku.logger.Infof("Total expected master count: %d\n", expectedMasterCount) - ku.logger.Infof("Master nodes that need to be upgraded: %d\n", mastersToUgradeCount) - ku.logger.Infof("Master nodes that have been upgraded: %d\n", mastersUpgradedCount) + ku.logger.Infof("Total expected master count: %d", expectedMasterCount) + ku.logger.Infof("Master nodes that need to be upgraded: %d", mastersToUgradeCount) + ku.logger.Infof("Master nodes that have been upgraded: %d", mastersUpgradedCount) - ku.logger.Infof("Starting upgrade of master nodes...\n") + ku.logger.Infof("Starting upgrade of master nodes...") masterNodesInCluster := len(*ku.ClusterTopology.MasterVMs) + mastersUpgradedCount - ku.logger.Infof("masterNodesInCluster: %d\n", masterNodesInCluster) + ku.logger.Infof("masterNodesInCluster: %d", masterNodesInCluster) if masterNodesInCluster > expectedMasterCount { return ku.Translator.Errorf("Total count of master VMs: %d exceeded expected count: %d", masterNodesInCluster, expectedMasterCount) } @@ -103,31 +103,31 @@ func (ku *Upgrader) upgradeMasterNodes() error { upgradedMastersIndex := make(map[int]bool) for _, vm := range *ku.ClusterTopology.UpgradedMasterVMs { - ku.logger.Infof("Master VM: %s is upgraded to expected orchestrator version\n", *vm.Name) + ku.logger.Infof("Master VM: %s is upgraded to expected orchestrator version", *vm.Name) masterIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) upgradedMastersIndex[masterIndex] = true } for _, vm := range *ku.ClusterTopology.MasterVMs { - ku.logger.Infof("Upgrading Master VM: %s\n", *vm.Name) + ku.logger.Infof("Upgrading Master VM: %s", *vm.Name) masterIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) err := upgradeMasterNode.DeleteNode(vm.Name, false) if err != nil { - ku.logger.Infof("Error deleting master VM: %s, err: %v\n", *vm.Name, err) + ku.logger.Infof("Error deleting master VM: %s, err: %v", *vm.Name, err) return err } err = upgradeMasterNode.CreateNode("master", masterIndex) if err != nil { - ku.logger.Infof("Error creating upgraded master VM: %s\n", *vm.Name) + ku.logger.Infof("Error creating upgraded master VM: %s", *vm.Name) return err } err = upgradeMasterNode.Validate(vm.Name) if err != nil { - ku.logger.Infof("Error validating upgraded master VM: %s\n", *vm.Name) + ku.logger.Infof("Error validating upgraded master VM: %s", *vm.Name) return err } @@ -138,11 +138,11 @@ func (ku *Upgrader) upgradeMasterNodes() error { // VM upgrade when a master VM was deleted but creation of upgraded master did not run. if masterNodesInCluster < expectedMasterCount { ku.logger.Infof( - "Found missing master VMs in the cluster. Reconstructing names of missing master VMs for recreation during upgrade...\n") + "Found missing master VMs in the cluster. Reconstructing names of missing master VMs for recreation during upgrade...") } mastersToCreate := expectedMasterCount - masterNodesInCluster - ku.logger.Infof("Expected master count: %d, Creating %d more master VMs\n", expectedMasterCount, mastersToCreate) + ku.logger.Infof("Expected master count: %d, Creating %d more master VMs", expectedMasterCount, mastersToCreate) // NOTE: this is NOT completely idempotent because it assumes that // the OS disk has been deleted @@ -152,18 +152,18 @@ func (ku *Upgrader) upgradeMasterNodes() error { masterIndexToCreate++ } - ku.logger.Infof("Creating upgraded master VM with index: %d\n", masterIndexToCreate) + ku.logger.Infof("Creating upgraded master VM with index: %d", masterIndexToCreate) err = upgradeMasterNode.CreateNode("master", masterIndexToCreate) if err != nil { - ku.logger.Infof("Error creating upgraded master VM with index: %d\n", masterIndexToCreate) + ku.logger.Infof("Error creating upgraded master VM with index: %d", masterIndexToCreate) return err } tempVMName := "" err = upgradeMasterNode.Validate(&tempVMName) if err != nil { - ku.logger.Infof("Error validating upgraded master VM with index: %d\n", masterIndexToCreate) + ku.logger.Infof("Error validating upgraded master VM with index: %d", masterIndexToCreate) return err } @@ -182,7 +182,7 @@ func (ku *Upgrader) upgradeAgentPools() error { return ku.Translator.Errorf("error generating upgrade template: %s", err.Error()) } - ku.logger.Infof("Prepping agent pool: %s for upgrade...\n", *agentPool.Name) + ku.logger.Infof("Prepping agent pool '%s' for upgrade...", *agentPool.Name) preservePools := map[string]bool{*agentPool.Name: true} transformer := &acsengine.Transformer{ @@ -210,6 +210,11 @@ func (ku *Upgrader) upgradeAgentPools() error { } } + if agentCount == 0 { + ku.logger.Infof("Agent pool '%s' is empty", *agentPool.Name) + return nil + } + upgradeAgentNode := UpgradeAgentNode{ Translator: ku.Translator, logger: ku.logger, @@ -222,94 +227,113 @@ func (ku *Upgrader) upgradeAgentPools() error { upgradeAgentNode.kubeConfig = ku.kubeConfig upgradeVMs := make(map[int]*UpgradeVM) - - // Go over upgraded VMs and verify provisioning state. Delete the VM if the state is not 'Succeeded'. + // Go over upgraded VMs and verify provisioning state. Delete VMs in 'bad' state. // Such a VM will be re-created later in this function. for _, vm := range *agentPool.UpgradedAgentVMs { - ku.logger.Infof("Agent VM: %s, pool name: %s on expected orchestrator version\n", *vm.Name, *agentPool.Name) - var vmProvisioningState string + ku.logger.Infof("Agent VM: %s, pool name: %s on expected orchestrator version", *vm.Name, *agentPool.Name) + vmProvisioningState := "Invalid" if vm.VirtualMachineProperties != nil && vm.VirtualMachineProperties.ProvisioningState != nil { vmProvisioningState = *vm.VirtualMachineProperties.ProvisioningState } - if vmProvisioningState != string(api.Succeeded) { - ku.logger.Infof("Deleting agent VM %s in provisioning state %s\n", *vm.Name, vmProvisioningState) + switch vmProvisioningState { + case "Succeeded": + fallthrough + case "Creating": + agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) + upgradeVMs[agentIndex] = &UpgradeVM{*vm.Name, true} + + case "Failed": + fallthrough + case "Invalid": + ku.logger.Infof("Deleting agent VM %s in provisioning state %s", *vm.Name, vmProvisioningState) err := upgradeAgentNode.DeleteNode(vm.Name, false) if err != nil { - ku.logger.Errorf("Error deleting agent VM %s: %v\n", *vm.Name, err) - return err + ku.logger.Warnf("Error deleting agent VM %s: %v", *vm.Name, err) } - } else { - agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) - upgradeVMs[agentIndex] = &UpgradeVM{*vm.Name, true} + + case "Cancelled": + fallthrough + case "Deleting": + fallthrough + default: + ku.logger.Infof("Ignoring agent VM %s in provisioning state %s", *vm.Name, vmProvisioningState) } } - ku.logger.Infof("Starting upgrade of agent nodes in pool identifier: %s, name: %s...\n", - *agentPool.Identifier, *agentPool.Name) - for _, vm := range *agentPool.AgentVMs { agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) upgradeVMs[agentIndex] = &UpgradeVM{*vm.Name, false} } + toBeUpgradedCount := len(*agentPool.AgentVMs) - // Create missing nodes *plus* one extra node, which will be used to take on the load from upgrading nodes. - // In a normal mode of operation the actual number of VMs in the pool [len(upgradeVMs)] is equal to agentCount. - // However, if the upgrade failed in the middle, the actual number of VMs might differ. - for agentCount > 0 && len(upgradeVMs) <= agentCount { + ku.logger.Infof("Starting upgrade of %d agent nodes (out of %d) in pool identifier: %s, name: %s...", + toBeUpgradedCount, agentCount, *agentPool.Identifier, *agentPool.Name) + + // Create missing nodes to match agentCount. This could be due to previous upgrade failure + // If there are nodes that need to be upgraded, create one extra node, which will be used to take on the load from upgrading nodes. + if toBeUpgradedCount > 0 { + agentCount++ + } + for len(upgradeVMs) < agentCount { agentIndex := getAvailableIndex(upgradeVMs) vmName, err := armhelpers.GetK8sVMName(agentOsType, ku.DataModel.Properties.HostedMasterProfile != nil, ku.NameSuffix, agentPoolName, agentPoolIndex, agentIndex) if err != nil { - ku.logger.Errorf("Error reconstructing agent VM name with index %d: %v\n", agentIndex, err) + ku.logger.Errorf("Error reconstructing agent VM name with index %d: %v", agentIndex, err) return err } - ku.logger.Infof("Adding agent node %s (index %d)", vmName, agentIndex) + ku.logger.Infof("Creating new agent node %s (index %d)", vmName, agentIndex) err = upgradeAgentNode.CreateNode(*agentPool.Name, agentIndex) if err != nil { - ku.logger.Errorf("Error creating agent node %s (index %d): %v\n", vmName, agentIndex, err) + ku.logger.Errorf("Error creating agent node %s (index %d): %v", vmName, agentIndex, err) return err } err = upgradeAgentNode.Validate(&vmName) if err != nil { - ku.logger.Infof("Error validating agent node %s (index %d): %v\n", vmName, agentIndex, err) + ku.logger.Infof("Error validating agent node %s (index %d): %v", vmName, agentIndex, err) return err } upgradeVMs[agentIndex] = &UpgradeVM{vmName, true} } + if toBeUpgradedCount == 0 { + ku.logger.Infof("No nodes to upgrade") + return nil + } + // Upgrade nodes in agent pool - upgradedCount, toBeUpgraded := 0, len(*agentPool.AgentVMs) + upgradedCount := 0 for agentIndex, vm := range upgradeVMs { if vm.Upgraded { continue } - ku.logger.Infof("Upgrading Agent VM: %s, pool name: %s\n", vm.Name, *agentPool.Name) + ku.logger.Infof("Upgrading Agent VM: %s, pool name: %s", vm.Name, *agentPool.Name) err := upgradeAgentNode.DeleteNode(&vm.Name, true) if err != nil { - ku.logger.Errorf("Error deleting agent VM %s: %v\n", vm.Name, err) + ku.logger.Errorf("Error deleting agent VM %s: %v", vm.Name, err) return err } // do not create last node in favor of already created extra node. - if upgradedCount == toBeUpgraded-1 { + if upgradedCount == toBeUpgradedCount-1 { ku.logger.Infof("Skipping creation of VM %s (index %d)", vm.Name, agentIndex) delete(upgradeVMs, agentIndex) } else { err = upgradeAgentNode.CreateNode(*agentPool.Name, agentIndex) if err != nil { - ku.logger.Errorf("Error creating upgraded agent VM %s: %v\n", vm.Name, err) + ku.logger.Errorf("Error creating upgraded agent VM %s: %v", vm.Name, err) return err } err = upgradeAgentNode.Validate(&vm.Name) if err != nil { - ku.logger.Errorf("Error validating upgraded agent VM %s: %v\n", vm.Name, err) + ku.logger.Errorf("Error validating upgraded agent VM %s: %v", vm.Name, err) return err } vm.Upgraded = true From 80b5c83641d88455d2af039892db917fa47b0860 Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Thu, 30 Nov 2017 09:55:46 -0800 Subject: [PATCH 2/7] refined vm provisioning state --- pkg/operations/kubernetesupgrade/upgrader.go | 84 ++++++++++++-------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgrader.go b/pkg/operations/kubernetesupgrade/upgrader.go index d144ec2afa..10921e54a7 100644 --- a/pkg/operations/kubernetesupgrade/upgrader.go +++ b/pkg/operations/kubernetesupgrade/upgrader.go @@ -19,10 +19,17 @@ type Upgrader struct { kubeConfig string } -// UpgradeVM holds VM name and upgrade status -type UpgradeVM struct { - Name string - Upgraded bool +type vmStatus int + +const ( + vmStatusUpgraded vmStatus = iota + vmStatusNotUpgraded + vmStatusIgnored +) + +type vmInfo struct { + name string + status vmStatus } // Init initializes an upgrader struct @@ -226,44 +233,52 @@ func (ku *Upgrader) upgradeAgentPools() error { upgradeAgentNode.Client = ku.Client upgradeAgentNode.kubeConfig = ku.kubeConfig - upgradeVMs := make(map[int]*UpgradeVM) - // Go over upgraded VMs and verify provisioning state. Delete VMs in 'bad' state. - // Such a VM will be re-created later in this function. + agentVMs := make(map[int]*vmInfo) + // Go over upgraded VMs and verify provisioning state + // per https://docs.microsoft.com/en-us/rest/api/compute/virtualmachines/virtualmachines-state : + // - Creating: Indicates the virtual Machine is being created. + // - Updating: Indicates that there is an update operation in progress on the Virtual Machine. + // - Succeeded: Indicates that the operation executed on the virtual machine succeeded. + // - Deleting: Indicates that the virtual machine is being deleted. + // - Failed: Indicates that the update operation on the Virtual Machine failed. + // Delete VMs in 'bad' state. Such VMs will be re-created later in this function. + upgradedCount := 0 for _, vm := range *agentPool.UpgradedAgentVMs { ku.logger.Infof("Agent VM: %s, pool name: %s on expected orchestrator version", *vm.Name, *agentPool.Name) - vmProvisioningState := "Invalid" + var vmProvisioningState string if vm.VirtualMachineProperties != nil && vm.VirtualMachineProperties.ProvisioningState != nil { vmProvisioningState = *vm.VirtualMachineProperties.ProvisioningState } + agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) switch vmProvisioningState { - case "Succeeded": - fallthrough case "Creating": - agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) - upgradeVMs[agentIndex] = &UpgradeVM{*vm.Name, true} + fallthrough + case "Updating": + fallthrough + case "Succeeded": + agentVMs[agentIndex] = &vmInfo{*vm.Name, vmStatusUpgraded} + upgradedCount++ case "Failed": - fallthrough - case "Invalid": ku.logger.Infof("Deleting agent VM %s in provisioning state %s", *vm.Name, vmProvisioningState) err := upgradeAgentNode.DeleteNode(vm.Name, false) if err != nil { - ku.logger.Warnf("Error deleting agent VM %s: %v", *vm.Name, err) + ku.logger.Errorf("Error deleting agent VM %s: %v", *vm.Name, err) + return err } - case "Cancelled": - fallthrough case "Deleting": fallthrough default: ku.logger.Infof("Ignoring agent VM %s in provisioning state %s", *vm.Name, vmProvisioningState) + agentVMs[agentIndex] = &vmInfo{*vm.Name, vmStatusIgnored} } } for _, vm := range *agentPool.AgentVMs { agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) - upgradeVMs[agentIndex] = &UpgradeVM{*vm.Name, false} + agentVMs[agentIndex] = &vmInfo{*vm.Name, vmStatusNotUpgraded} } toBeUpgradedCount := len(*agentPool.AgentVMs) @@ -275,8 +290,8 @@ func (ku *Upgrader) upgradeAgentPools() error { if toBeUpgradedCount > 0 { agentCount++ } - for len(upgradeVMs) < agentCount { - agentIndex := getAvailableIndex(upgradeVMs) + for upgradedCount+toBeUpgradedCount < agentCount { + agentIndex := getAvailableIndex(agentVMs) vmName, err := armhelpers.GetK8sVMName(agentOsType, ku.DataModel.Properties.HostedMasterProfile != nil, ku.NameSuffix, agentPoolName, agentPoolIndex, agentIndex) @@ -298,7 +313,8 @@ func (ku *Upgrader) upgradeAgentPools() error { return err } - upgradeVMs[agentIndex] = &UpgradeVM{vmName, true} + agentVMs[agentIndex] = &vmInfo{vmName, vmStatusUpgraded} + upgradedCount++ } if toBeUpgradedCount == 0 { @@ -307,36 +323,36 @@ func (ku *Upgrader) upgradeAgentPools() error { } // Upgrade nodes in agent pool - upgradedCount := 0 - for agentIndex, vm := range upgradeVMs { - if vm.Upgraded { + upgradedCount = 0 + for agentIndex, vm := range agentVMs { + if vm.status != vmStatusNotUpgraded { continue } - ku.logger.Infof("Upgrading Agent VM: %s, pool name: %s", vm.Name, *agentPool.Name) + ku.logger.Infof("Upgrading Agent VM: %s, pool name: %s", vm.name, *agentPool.Name) - err := upgradeAgentNode.DeleteNode(&vm.Name, true) + err := upgradeAgentNode.DeleteNode(&vm.name, true) if err != nil { - ku.logger.Errorf("Error deleting agent VM %s: %v", vm.Name, err) + ku.logger.Errorf("Error deleting agent VM %s: %v", vm.name, err) return err } // do not create last node in favor of already created extra node. if upgradedCount == toBeUpgradedCount-1 { - ku.logger.Infof("Skipping creation of VM %s (index %d)", vm.Name, agentIndex) - delete(upgradeVMs, agentIndex) + ku.logger.Infof("Skipping creation of VM %s (index %d)", vm.name, agentIndex) + delete(agentVMs, agentIndex) } else { err = upgradeAgentNode.CreateNode(*agentPool.Name, agentIndex) if err != nil { - ku.logger.Errorf("Error creating upgraded agent VM %s: %v", vm.Name, err) + ku.logger.Errorf("Error creating upgraded agent VM %s: %v", vm.name, err) return err } - err = upgradeAgentNode.Validate(&vm.Name) + err = upgradeAgentNode.Validate(&vm.name) if err != nil { - ku.logger.Errorf("Error validating upgraded agent VM %s: %v", vm.Name, err) + ku.logger.Errorf("Error validating upgraded agent VM %s: %v", vm.name, err) return err } - vm.Upgraded = true + vm.status = vmStatusUpgraded } upgradedCount++ } @@ -372,7 +388,7 @@ func (ku *Upgrader) generateUpgradeTemplate(upgradeContainerService *api.Contain } // return unused index within the range of agent indices, or subsequent index -func getAvailableIndex(vms map[int]*UpgradeVM) int { +func getAvailableIndex(vms map[int]*vmInfo) int { maxIndex := 0 for indx := range vms { From b6f3d6b6dda230bbb854e45259e9c2dce927614b Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Thu, 30 Nov 2017 14:51:20 -0800 Subject: [PATCH 3/7] addressed comments --- pkg/operations/kubernetesupgrade/upgrader.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgrader.go b/pkg/operations/kubernetesupgrade/upgrader.go index 10921e54a7..8bb9c93da6 100644 --- a/pkg/operations/kubernetesupgrade/upgrader.go +++ b/pkg/operations/kubernetesupgrade/upgrader.go @@ -252,11 +252,7 @@ func (ku *Upgrader) upgradeAgentPools() error { agentIndex, _ := armhelpers.GetVMNameIndex(vm.StorageProfile.OsDisk.OsType, *vm.Name) switch vmProvisioningState { - case "Creating": - fallthrough - case "Updating": - fallthrough - case "Succeeded": + case "Creating", "Updating", "Succeeded": agentVMs[agentIndex] = &vmInfo{*vm.Name, vmStatusUpgraded} upgradedCount++ From 4777fdc1c18027538328ee0319d883339090540e Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Thu, 30 Nov 2017 18:38:12 -0800 Subject: [PATCH 4/7] stop go routine on the return from the host function --- .../kubernetesupgrade/upgradeagentnode.go | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgradeagentnode.go b/pkg/operations/kubernetesupgrade/upgradeagentnode.go index 7135ebac14..ff1ab1c2fa 100644 --- a/pkg/operations/kubernetesupgrade/upgradeagentnode.go +++ b/pkg/operations/kubernetesupgrade/upgradeagentnode.go @@ -3,6 +3,7 @@ package kubernetesupgrade import ( "fmt" "math/rand" + "sync" "time" "k8s.io/client-go/pkg/api/v1/node" @@ -113,8 +114,13 @@ func (kan *UpgradeAgentNode) Validate(vmName *string) error { } ch := make(chan struct{}, 1) - go func() { - for { + var wg sync.WaitGroup + wg.Add(1) + running := true + + go func(running *bool) { + defer wg.Done() + for *running { agentNode, err := client.GetNode(*vmName) if err != nil { kan.logger.Infof("Agent VM: %s status error: %v\n", *vmName, err) @@ -127,15 +133,22 @@ func (kan *UpgradeAgentNode) Validate(vmName *string) error { time.Sleep(time.Second * 5) } } - }() + }(&running) - for { + for running { select { case <-ch: - return nil + err = nil + running = false case <-time.After(timeout): - kan.logger.Errorf("Node was not ready within %v", timeout) - return fmt.Errorf("Node was not ready within %v", timeout) + err = fmt.Errorf("Node was not ready within %v", timeout) + running = false } } + + if err != nil { + kan.logger.Errorf(err.Error()) + } + wg.Wait() + return err } From 9e5a54f8b5a43061f1128b81e80312486ba0856a Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Fri, 1 Dec 2017 11:51:53 -0800 Subject: [PATCH 5/7] simplify node validation --- .../kubernetesupgrade/upgradeagentnode.go | 42 ++++++------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgradeagentnode.go b/pkg/operations/kubernetesupgrade/upgradeagentnode.go index ff1ab1c2fa..47385879bf 100644 --- a/pkg/operations/kubernetesupgrade/upgradeagentnode.go +++ b/pkg/operations/kubernetesupgrade/upgradeagentnode.go @@ -3,7 +3,6 @@ package kubernetesupgrade import ( "fmt" "math/rand" - "sync" "time" "k8s.io/client-go/pkg/api/v1/node" @@ -113,42 +112,27 @@ func (kan *UpgradeAgentNode) Validate(vmName *string) error { return err } - ch := make(chan struct{}, 1) - var wg sync.WaitGroup - wg.Add(1) - running := true - - go func(running *bool) { - defer wg.Done() - for *running { + retryTimer := time.NewTimer(time.Millisecond) + timeoutTimer := time.NewTimer(timeout) + for { + select { + case <-timeoutTimer.C: + retryTimer.Stop() + kan.logger.Errorf("Node was not ready within %v", timeout) + return fmt.Errorf("Node was not ready within %v", timeout) + case <-retryTimer.C: agentNode, err := client.GetNode(*vmName) if err != nil { kan.logger.Infof("Agent VM: %s status error: %v\n", *vmName, err) - time.Sleep(time.Second * 5) + retryTimer.Reset(time.Second * 5) } else if node.IsNodeReady(agentNode) { kan.logger.Infof("Agent VM: %s is ready", *vmName) - ch <- struct{}{} + timeoutTimer.Stop() + return nil } else { kan.logger.Infof("Agent VM: %s not ready yet...", *vmName) - time.Sleep(time.Second * 5) + retryTimer.Reset(time.Second * 5) } } - }(&running) - - for running { - select { - case <-ch: - err = nil - running = false - case <-time.After(timeout): - err = fmt.Errorf("Node was not ready within %v", timeout) - running = false - } - } - - if err != nil { - kan.logger.Errorf(err.Error()) } - wg.Wait() - return err } From 12e3b175b11756d90878f76116a9413f6dbab3de Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Fri, 1 Dec 2017 11:58:12 -0800 Subject: [PATCH 6/7] formatting --- pkg/operations/kubernetesupgrade/upgradeagentnode.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgradeagentnode.go b/pkg/operations/kubernetesupgrade/upgradeagentnode.go index 47385879bf..593bb2fcfb 100644 --- a/pkg/operations/kubernetesupgrade/upgradeagentnode.go +++ b/pkg/operations/kubernetesupgrade/upgradeagentnode.go @@ -16,6 +16,7 @@ import ( const ( interval = time.Second * 1 + retry = time.Second * 5 timeout = time.Minute * 10 ) @@ -124,14 +125,14 @@ func (kan *UpgradeAgentNode) Validate(vmName *string) error { agentNode, err := client.GetNode(*vmName) if err != nil { kan.logger.Infof("Agent VM: %s status error: %v\n", *vmName, err) - retryTimer.Reset(time.Second * 5) + retryTimer.Reset(retry) } else if node.IsNodeReady(agentNode) { kan.logger.Infof("Agent VM: %s is ready", *vmName) timeoutTimer.Stop() return nil } else { kan.logger.Infof("Agent VM: %s not ready yet...", *vmName) - retryTimer.Reset(time.Second * 5) + retryTimer.Reset(retry) } } } From 8405ca9ea585f60c947d078b504f651290b311b4 Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Fri, 1 Dec 2017 13:39:20 -0800 Subject: [PATCH 7/7] addressed comments --- pkg/operations/kubernetesupgrade/upgradeagentnode.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/operations/kubernetesupgrade/upgradeagentnode.go b/pkg/operations/kubernetesupgrade/upgradeagentnode.go index 593bb2fcfb..74910bf55c 100644 --- a/pkg/operations/kubernetesupgrade/upgradeagentnode.go +++ b/pkg/operations/kubernetesupgrade/upgradeagentnode.go @@ -66,7 +66,7 @@ func (kan *UpgradeAgentNode) CreateNode(poolName string, agentNo int) error { poolCountParameter := kan.ParametersMap[poolName+"Count"].(map[string]interface{}) poolCountParameter["value"] = agentNo + 1 agentCount, _ := poolCountParameter["value"] - kan.logger.Infof("Agent pool: %s, set count to: %d temporarily during upgrade. Upgrading agent: %d\n", + kan.logger.Infof("Agent pool: %s, set count to: %d temporarily during upgrade. Upgrading agent: %d", poolName, agentCount, agentNo) poolOffsetVarName := poolName + "Offset" @@ -119,12 +119,13 @@ func (kan *UpgradeAgentNode) Validate(vmName *string) error { select { case <-timeoutTimer.C: retryTimer.Stop() - kan.logger.Errorf("Node was not ready within %v", timeout) - return fmt.Errorf("Node was not ready within %v", timeout) + err := fmt.Errorf("Node was not ready within %v", timeout) + kan.logger.Errorf(err.Error()) + return err case <-retryTimer.C: agentNode, err := client.GetNode(*vmName) if err != nil { - kan.logger.Infof("Agent VM: %s status error: %v\n", *vmName, err) + kan.logger.Infof("Agent VM: %s status error: %v", *vmName, err) retryTimer.Reset(retry) } else if node.IsNodeReady(agentNode) { kan.logger.Infof("Agent VM: %s is ready", *vmName)