From 1c1e812af77850a9a42da67d3b6a5afdd0dd4562 Mon Sep 17 00:00:00 2001 From: dmitsh Date: Wed, 14 Mar 2018 11:38:30 -0700 Subject: [PATCH] Deregister node in api server during cordon-and-drain (#2437) * Deregister node in api server (optionally) during cordon-and-drain * addressed comments * added warning message --- pkg/armhelpers/interfaces.go | 2 ++ pkg/armhelpers/kubeclient.go | 5 +++ pkg/armhelpers/mockclients.go | 9 +++++ pkg/operations/cordondrainvm.go | 5 +++ .../kubernetesupgrade/upgradeagentnode.go | 34 +++++++++++++------ 5 files changed, 45 insertions(+), 10 deletions(-) diff --git a/pkg/armhelpers/interfaces.go b/pkg/armhelpers/interfaces.go index 7e927aefd2..92074a9d8c 100644 --- a/pkg/armhelpers/interfaces.go +++ b/pkg/armhelpers/interfaces.go @@ -102,6 +102,8 @@ type KubernetesClient interface { GetNode(name string) (*v1.Node, error) //UpdateNode updates the node in the api server with the passed in info UpdateNode(node *v1.Node) (*v1.Node, error) + //DeleteNode deregisters node in the api server + DeleteNode(name string) error //SupportEviction queries the api server to discover if it supports eviction, and returns supported type if it is supported SupportEviction() (string, error) //DeletePod deletes the passed in pod diff --git a/pkg/armhelpers/kubeclient.go b/pkg/armhelpers/kubeclient.go index a95e3fa8d9..3cd85b8fa2 100644 --- a/pkg/armhelpers/kubeclient.go +++ b/pkg/armhelpers/kubeclient.go @@ -56,6 +56,11 @@ func (c *KubernetesClientSetClient) UpdateNode(node *v1.Node) (*v1.Node, error) return c.clientset.Nodes().Update(node) } +//DeleteNode deregisters the node in the api server +func (c *KubernetesClientSetClient) DeleteNode(name string) error { + return c.clientset.Nodes().Delete(name, &metav1.DeleteOptions{}) +} + //SupportEviction queries the api server to discover if it supports eviction, and returns supported type if it is supported func (c *KubernetesClientSetClient) SupportEviction() (string, error) { discoveryClient := c.clientset.Discovery() diff --git a/pkg/armhelpers/mockclients.go b/pkg/armhelpers/mockclients.go index 5e09ec0572..d7dc5de514 100644 --- a/pkg/armhelpers/mockclients.go +++ b/pkg/armhelpers/mockclients.go @@ -44,6 +44,7 @@ type MockKubernetesClient struct { FailGetNode bool UpdateNodeFunc func(*v1.Node) (*v1.Node, error) FailUpdateNode bool + FailDeleteNode bool FailSupportEviction bool FailDeletePod bool FailEvictPod bool @@ -84,6 +85,14 @@ func (mkc *MockKubernetesClient) UpdateNode(node *v1.Node) (*v1.Node, error) { return node, nil } +//DeleteNode deregisters node in the api server +func (mkc *MockKubernetesClient) DeleteNode(name string) error { + if mkc.FailDeleteNode { + return fmt.Errorf("DeleteNode failed") + } + return nil +} + //SupportEviction queries the api server to discover if it supports eviction, and returns supported type if it is supported func (mkc *MockKubernetesClient) SupportEviction() (string, error) { if mkc.FailSupportEviction { diff --git a/pkg/operations/cordondrainvm.go b/pkg/operations/cordondrainvm.go index 3fc5ed65b9..e3b02f13e6 100644 --- a/pkg/operations/cordondrainvm.go +++ b/pkg/operations/cordondrainvm.go @@ -37,9 +37,14 @@ func SafelyDrainNode(az armhelpers.ACSEngineClient, logger *log.Entry, masterURL if err != nil { return err } + return SafelyDrainNodeWithClient(client, logger, nodeName, timeout) +} +// SafelyDrainNodeWithClient safely drains a node so that it can be deleted from the cluster +func SafelyDrainNodeWithClient(client armhelpers.KubernetesClient, logger *log.Entry, nodeName string, timeout time.Duration) error { //Mark the node unschedulable var node *v1.Node + var err error for i := 0; i < cordonMaxRetries; i++ { node, err = client.GetNode(nodeName) if err != nil { diff --git a/pkg/operations/kubernetesupgrade/upgradeagentnode.go b/pkg/operations/kubernetesupgrade/upgradeagentnode.go index e2cfbc30f3..2f822613b9 100644 --- a/pkg/operations/kubernetesupgrade/upgradeagentnode.go +++ b/pkg/operations/kubernetesupgrade/upgradeagentnode.go @@ -5,13 +5,14 @@ import ( "math/rand" "time" - "k8s.io/client-go/pkg/api/v1/node" - "github.com/Azure/acs-engine/pkg/api" "github.com/Azure/acs-engine/pkg/armhelpers" "github.com/Azure/acs-engine/pkg/i18n" "github.com/Azure/acs-engine/pkg/operations" "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/pkg/api/v1/node" ) const ( @@ -40,24 +41,37 @@ type UpgradeAgentNode struct { // the node // The 'drain' flag is used to invoke 'cordon and drain' flow. func (kan *UpgradeAgentNode) DeleteNode(vmName *string, drain bool) error { - if drain { - var kubeAPIServerURL string + var kubeAPIServerURL string - if kan.UpgradeContainerService.Properties.HostedMasterProfile != nil { - kubeAPIServerURL = kan.UpgradeContainerService.Properties.HostedMasterProfile.FQDN - } else { - kubeAPIServerURL = kan.UpgradeContainerService.Properties.MasterProfile.FQDN - } + if kan.UpgradeContainerService.Properties.HostedMasterProfile != nil { + kubeAPIServerURL = kan.UpgradeContainerService.Properties.HostedMasterProfile.FQDN + } else { + kubeAPIServerURL = kan.UpgradeContainerService.Properties.MasterProfile.FQDN + } - err := operations.SafelyDrainNode(kan.Client, kan.logger, kubeAPIServerURL, kan.kubeConfig, *vmName, time.Minute) + client, err := kan.Client.GetKubernetesClient(kubeAPIServerURL, kan.kubeConfig, interval, kan.timeout) + if err != nil { + return err + } + // Cordon and drain the node + if drain { + err := operations.SafelyDrainNodeWithClient(client, kan.logger, *vmName, time.Minute) if err != nil { kan.logger.Warningf("Error draining agent VM %s. Proceeding with deletion. Error: %v", *vmName, err) // Proceed with deletion anyways } } + // Delete VM in ARM if err := operations.CleanDeleteVirtualMachine(kan.Client, kan.logger, kan.ResourceGroup, *vmName); err != nil { return err } + // Delete VM in api server + if err = client.DeleteNode(*vmName); err != nil { + statusErr, ok := err.(*errors.StatusError) + if ok && statusErr.ErrStatus.Reason != v1.StatusReasonNotFound { + kan.logger.Warnf("Node %s got an error while deregistering: %#v", *vmName, err) + } + } return nil }