Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MaxUnavailable and powered off hosts comparison #1947

Merged
merged 1 commit into from
Mar 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,18 @@ func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernete
inactiveHosts := make(map[string]bool)
var controlPlaneHosts, notReadyHosts []*hosts.Host
var notReadyHostNames []string
var err error

for _, host := range c.InactiveHosts {
// include only hosts with controlplane role
if host.IsControl && !c.HostsLabeledToIgnoreUpgrade[host.Address] {
inactiveHosts[host.HostnameOverride] = true
}
}
c.MaxUnavailableForControlNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForControlNodes, len(inactiveHosts), services.ControlRole)
if err != nil {
return "", err
}
for _, host := range c.ControlPlaneHosts {
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
controlPlaneHosts = append(controlPlaneHosts, host)
Expand Down Expand Up @@ -265,13 +270,18 @@ func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes
inactiveHosts := make(map[string]bool)
var notReadyHosts []*hosts.Host
var notReadyHostNames []string
var err error

for _, host := range c.InactiveHosts {
// if host has controlplane role, it already has worker components upgraded
if !host.IsControl && !c.HostsLabeledToIgnoreUpgrade[host.Address] {
inactiveHosts[host.HostnameOverride] = true
}
}
c.MaxUnavailableForWorkerNodes, err = services.ResetMaxUnavailable(c.MaxUnavailableForWorkerNodes, len(inactiveHosts), services.WorkerRole)
if err != nil {
return "", err
}
for _, host := range append(etcdAndWorkerHosts, workerOnlyHosts...) {
if c.NewHosts[host.HostnameOverride] {
continue
Expand Down
42 changes: 41 additions & 1 deletion cluster/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ func GetExternalFlags(local, updateOnly, disablePortCheck bool, configDir, clust
func (c *Cluster) setAddonsDefaults() {
c.Ingress.UpdateStrategy = setDaemonsetAddonDefaults(c.Ingress.UpdateStrategy)
c.Network.UpdateStrategy = setDaemonsetAddonDefaults(c.Network.UpdateStrategy)
c.DNS.UpdateStrategy = setDeploymentAddonDefaults(c.DNS.UpdateStrategy)
c.DNS.UpdateStrategy = setDNSDeploymentAddonDefaults(c.DNS.UpdateStrategy, c.DNS.Provider)
if c.DNS.LinearAutoscalerParams == nil {
c.DNS.LinearAutoscalerParams = &DefaultClusterProportionalAutoscalerLinearParams
}
Expand Down Expand Up @@ -638,3 +638,43 @@ func setDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy) *apps
}
return updateStrategy
}

func setDNSDeploymentAddonDefaults(updateStrategy *appsv1.DeploymentStrategy, dnsProvider string) *appsv1.DeploymentStrategy {
var (
coreDNSMaxUnavailable, coreDNSMaxSurge = intstr.FromInt(1), intstr.FromInt(0)
kubeDNSMaxSurge, kubeDNSMaxUnavailable = intstr.FromString("10%"), intstr.FromInt(0)
)
if updateStrategy != nil && updateStrategy.Type != appsv1.RollingUpdateDeploymentStrategyType {
return updateStrategy
}
switch dnsProvider {
case CoreDNSProvider:
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
return &appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
MaxUnavailable: &coreDNSMaxUnavailable,
MaxSurge: &coreDNSMaxSurge,
},
}
}
if updateStrategy.RollingUpdate.MaxUnavailable == nil {
updateStrategy.RollingUpdate.MaxUnavailable = &coreDNSMaxUnavailable
}
case KubeDNSProvider:
if updateStrategy == nil || updateStrategy.RollingUpdate == nil {
return &appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
MaxUnavailable: &kubeDNSMaxUnavailable,
MaxSurge: &kubeDNSMaxSurge,
},
}
}
if updateStrategy.RollingUpdate.MaxSurge == nil {
updateStrategy.RollingUpdate.MaxSurge = &kubeDNSMaxSurge
}
}

return updateStrategy
}
20 changes: 13 additions & 7 deletions services/controlplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,22 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
}
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), ControlRole)
if err != nil {
return errMsgMaxUnavailableNotFailed, err
var inactiveHostErr error
if len(inactiveHosts) > 0 {
var inactiveHostNames []string
for hostName := range inactiveHosts {
inactiveHostNames = append(inactiveHostNames, hostName)
}
inactiveHostErr = fmt.Errorf("provisioning incomplete, host(s) [%s] skipped because they could not be contacted", strings.Join(inactiveHostNames, ","))
}
hostsFailedToUpgrade, err := processControlPlaneForUpgrade(ctx, kubeClient, controlHosts, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap,
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
if err != nil {
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
return errMsgMaxUnavailableNotFailed, err
if err != nil || inactiveHostErr != nil {
if len(hostsFailedToUpgrade) > 0 {
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
}
return errMsgMaxUnavailableNotFailed, util.ErrList([]error{err, inactiveHostErr})
}
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
return errMsgMaxUnavailableNotFailed, nil
Expand Down
2 changes: 1 addition & 1 deletion services/node_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string
return maxUnavailable, nil
}

func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
func ResetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string) (int, error) {
if maxUnavailable > WorkerThreads {
/* upgrading a large number of nodes in parallel leads to a large number of goroutines, which has led to errors regarding too many open sockets
Because of this RKE switched to using workerpools. 50 workerthreads has been sufficient to optimize rke up, upgrading at most 50 nodes in parallel.
Expand Down
4 changes: 0 additions & 4 deletions services/workerplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ func RunWorkerPlane(ctx context.Context, allHosts []*hosts.Host, localConnDialer
func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *kubernetes.Clientset, mixedRolesHosts []*hosts.Host, workerOnlyHosts []*hosts.Host, inactiveHosts map[string]bool, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, workerNodePlanMap map[string]v3.RKEConfigNodePlan, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage string, upgradeStrategy *v3.NodeUpgradeStrategy, newHosts map[string]bool, maxUnavailable int) (string, error) {
log.Infof(ctx, "[%s] Upgrading Worker Plane..", WorkerRole)
var errMsgMaxUnavailableNotFailed string
maxUnavailable, err := resetMaxUnavailable(maxUnavailable, len(inactiveHosts), WorkerRole)
if err != nil {
return errMsgMaxUnavailableNotFailed, err
}
updateNewHostsList(kubeClient, append(mixedRolesHosts, workerOnlyHosts...), newHosts)
if len(mixedRolesHosts) > 0 {
log.Infof(ctx, "First checking and processing worker components for upgrades on nodes with etcd role one at a time")
Expand Down