diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go index bb578dca6427..9c3b8a5314c8 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "os" + "strings" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -46,6 +47,7 @@ const ( resourceNameMachine = "machines" resourceNameMachineSet = "machinesets" resourceNameMachineDeployment = "machinedeployments" + failedMachinePrefix = "failed-machine-" ) // machineController watches for Nodes, Machines, MachineSets and @@ -221,6 +223,16 @@ func (c *machineController) findMachineByProviderID(providerID normalizedProvide } } + if isFailedMachineProviderID(providerID) { + machine, err := c.findMachine(machineKeyFromFailedProviderID(providerID)) + if err != nil { + return nil, err + } + if machine != nil { + return machine.DeepCopy(), nil + } + } + // If the machine object has no providerID--maybe actuator // does not set this value (e.g., OpenStack)--then first // lookup the node using ProviderID. If that is successful @@ -236,6 +248,15 @@ func (c *machineController) findMachineByProviderID(providerID normalizedProvide return c.findMachine(node.Annotations[machineAnnotationKey]) } +func isFailedMachineProviderID(providerID normalizedProviderID) bool { + return strings.HasPrefix(string(providerID), failedMachinePrefix) +} + +func machineKeyFromFailedProviderID(providerID normalizedProviderID) string { + namespaceName := strings.TrimPrefix(string(providerID), failedMachinePrefix) + return strings.Replace(namespaceName, "_", "/", 1) +} + // findNodeByNodeName finds the Node object keyed by name.. Returns // nil if it cannot be found. A DeepCopy() of the object is returned // on success. @@ -422,6 +443,15 @@ func (c *machineController) machineSetProviderIDs(machineSet *MachineSet) ([]str continue } + if machine.Status.Phase == "Failed" { + klog.V(4).Infof("Status.Phase of machine %q is failed", machine.Name) + // Provide a fake ID that can be recognised later and convered into a machine key. + // Use an underscore as a separator between namespace and name as it is not a + // valid character within a namespace name. + providerIDs = append(providerIDs, fmt.Sprintf("%s%s_%s", failedMachinePrefix, machine.Namespace, machine.Name)) + continue + } + if machine.Status.NodeRef == nil { klog.V(4).Infof("Status.NodeRef of machine %q is currently nil", machine.Name) continue diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go index 3d1c06a7f5e1..776cc8e4122c 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go @@ -472,7 +472,17 @@ func TestControllerFindMachineByProviderID(t *testing.T) { t.Fatalf("expected machines to be equal - expected %+v, got %+v", testConfig.machines[0], machine) } - // Test #2: Verify machine is not found if it has a + // Test #2: Verify machine returned by fake provider ID is correct machine + fakeProviderID := fmt.Sprintf("%s$s/%s", testConfig.machines[0].Namespace, testConfig.machines[0].Name) + machine, err = controller.findMachineByProviderID(normalizedProviderID(fakeProviderID)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if machine != nil { + t.Fatal("expected find to fail") + } + + // Test #3: Verify machine is not found if it has a // non-existent or different provider ID. machine = testConfig.machines[0].DeepCopy() machine.Spec.ProviderID = pointer.StringPtr("does-not-match") diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_converters.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_converters.go index 9d883625594b..dba984337c13 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_converters.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_converters.go @@ -115,6 +115,10 @@ func newMachineFromUnstructured(u *unstructured.Unstructured) *Machine { machine.Status.NodeRef = &nodeRef } + if phase, _, _ := unstructured.NestedString(u.Object, "status", "phase"); phase != "" { + machine.Status.Phase = phase + } + return &machine } @@ -184,5 +188,7 @@ func newUnstructuredFromMachine(m *Machine) *unstructured.Unstructured { } } + unstructured.SetNestedField(u.Object, m.Status.Phase, "status", "phase") + return &u } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/machine_types.go b/cluster-autoscaler/cloudprovider/clusterapi/machine_types.go index 4f4e968ac20f..0b3b43e8fd97 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/machine_types.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/machine_types.go @@ -63,6 +63,11 @@ type MachineStatus struct { // NodeRef will point to the corresponding Node if it exists. // +optional NodeRef *corev1.ObjectReference `json:"nodeRef,omitempty"` + + // Phase represents the current phase of machine actuation. + // E.g. Pending, Running, Terminating, Failed etc. + // +optional + Phase string `json:"phase,omitempty"` } // MachineList contains a list of Machine