Skip to content

Commit

Permalink
Merge pull request #78 from JoelSpeed/replace-deleted-machines
Browse files Browse the repository at this point in the history
[OCPCLOUD-1650] Ensure we replace deleted machines in the RollingUpdate strategy
  • Loading branch information
openshift-merge-robot authored Aug 8, 2022
2 parents 711a28c + 19f2334 commit 4260e66
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 21 deletions.
58 changes: 37 additions & 21 deletions pkg/controllers/controlplanemachineset/updates.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func (r *ControlPlaneMachineSetReconciler) reconcileMachineOnDeleteUpdate(ctx co

func (r *ControlPlaneMachineSetReconciler) waitForPendingMachines(logger logr.Logger, machines []machineproviders.MachineInfo) bool {
machinesPending := pendingMachines(machines)
machinesNeedingUpdate := needUpdateMachines(machines)
machinesNeedingReplacement := needReplacementMachines(machines)
machinesReady := readyMachines(machines)

// Find out if and what Machines in this index need an update.
Expand All @@ -219,13 +219,13 @@ func (r *ControlPlaneMachineSetReconciler) waitForPendingMachines(logger logr.Lo
return true
}

if hasAny(machinesNeedingUpdate) && hasAny(machinesPending) {
if hasAny(machinesNeedingReplacement) && hasAny(machinesPending) {
// A Pending Machine Replacement already exists.
// Wait for it to become Ready.
// Consider the first found pending machine for this index to be the replacement machine.
replacementMachine := machinesPending[0]
// Consider the first found outdated machine for this index to be the one in need of update.
outdatedMachine := machinesNeedingUpdate[0]
outdatedMachine := machinesNeedingReplacement[0]

logger := logger.WithValues("index", int(outdatedMachine.Index), "namespace", r.Namespace, "name", outdatedMachine.MachineRef.ObjectMeta.Name)
logger.V(2).WithValues("replacementName", replacementMachine.MachineRef.ObjectMeta.Name).Info(waitingForReplacement)
Expand All @@ -237,17 +237,17 @@ func (r *ControlPlaneMachineSetReconciler) waitForPendingMachines(logger logr.Lo
}

func (r *ControlPlaneMachineSetReconciler) deleteReplacedMachines(ctx context.Context, logger logr.Logger, machineProvider machineproviders.MachineProvider, machines []machineproviders.MachineInfo) (bool, ctrl.Result, error) {
machinesNeedingUpdate := needUpdateMachines(machines)
machinesNeedingReplacement := needReplacementMachines(machines)
machinesUpdated := updatedMachines(machines)
machinesOutdatedNonReady := nonReadyMachines(machinesNeedingUpdate)
machinesOutdatedNonReady := nonReadyMachines(machinesNeedingReplacement)

var toDeleteMachine machineproviders.MachineInfo

if hasAny(machinesNeedingUpdate) && hasAny(machinesUpdated) {
if hasAny(machinesNeedingReplacement) && hasAny(machinesUpdated) {
// The Outdated Machine still exists for this index,
// but an Updated replacement exists for it.
// Thus it is safe to trigger its Deletion.
toDeleteMachine = machinesNeedingUpdate[0]
toDeleteMachine = machinesNeedingReplacement[0]
}

if hasAny(machinesOutdatedNonReady) {
Expand Down Expand Up @@ -283,9 +283,9 @@ func (r *ControlPlaneMachineSetReconciler) deleteReplacedMachines(ctx context.Co
}

func (r *ControlPlaneMachineSetReconciler) createReplacementMachines(ctx context.Context, logger logr.Logger, machineProvider machineproviders.MachineProvider, machines []machineproviders.MachineInfo, idx int, maxSurge int, surgeCount *int) (bool, ctrl.Result, error) {
machinesNeedingUpdate := needUpdateMachines(machines)
machinesNeedingReplacement := needReplacementMachines(machines)
machinesPending := pendingMachines(machines)
machinesUpdated := updatedMachines(machines)
machinesUpdatedNonDeleted := updatedNonDeletedMachines(machines)

if isEmpty(machines) {
// No Machines exist for this index.
Expand All @@ -300,12 +300,12 @@ func (r *ControlPlaneMachineSetReconciler) createReplacementMachines(ctx context
return true, result, nil
}

if hasAny(machinesNeedingUpdate) && isEmpty(machinesUpdated) && isEmpty(machinesPending) {
// A Machine for this index needs updating.
// No Updated or Pending (Updated, Non-Ready) Replacement Machine exist for it.
if hasAny(machinesNeedingReplacement) && isEmpty(machinesUpdatedNonDeleted) && isEmpty(machinesPending) {
// A Machine for this index needs updating (or has been deleted).
// No Updated (non-terminated) or Pending (Updated, Non-Ready) Replacement Machine exist for it.
// Trigger a Machine creation.
// Consider the first found outdated machine for this index to be the one in need of update.
outdatedMachine := machinesNeedingUpdate[0]
outdatedMachine := machinesNeedingReplacement[0]
logger := logger.WithValues("index", int(outdatedMachine.Index), "namespace", r.Namespace, "name", outdatedMachine.MachineRef.ObjectMeta.Name)

result, err := createMachine(ctx, logger, machineProvider, outdatedMachine.Index, maxSurge, surgeCount)
Expand Down Expand Up @@ -363,25 +363,27 @@ func isDeletedMachine(m machineproviders.MachineInfo) bool {
return m.MachineRef.ObjectMeta.DeletionTimestamp != nil
}

// needUpdateMachines returns the list of MachineInfo which have Machines that need an update.
func needUpdateMachines(machinesInfo []machineproviders.MachineInfo) []machineproviders.MachineInfo {
needUpdate := []machineproviders.MachineInfo{}
// needReplacementMachines returns the list of MachineInfo which have Machines that need an update or have
// been deleted.
func needReplacementMachines(machinesInfo []machineproviders.MachineInfo) []machineproviders.MachineInfo {
needsReplacement := []machineproviders.MachineInfo{}

for _, m := range machinesInfo {
if m.NeedsUpdate {
needUpdate = append(needUpdate, m)
if m.NeedsUpdate || isDeletedMachine(m) {
needsReplacement = append(needsReplacement, m)
}
}

return needUpdate
return needsReplacement
}

// pendingMachines returns the list of MachineInfo which have a Pending Machine.
// pendingMachines returns the list of MachineInfo which have a Pending Machine and are not pending deletion.
// A Machine pending deletion should not be considered pending as it will never progress into a Ready Machine.
func pendingMachines(machinesInfo []machineproviders.MachineInfo) []machineproviders.MachineInfo {
result := []machineproviders.MachineInfo{}

for i := range machinesInfo {
if !machinesInfo[i].Ready && !machinesInfo[i].NeedsUpdate {
if !machinesInfo[i].Ready && !machinesInfo[i].NeedsUpdate && !isDeletedMachine(machinesInfo[i]) {
result = append(result, machinesInfo[i])
}
}
Expand All @@ -402,6 +404,20 @@ func updatedMachines(machinesInfo []machineproviders.MachineInfo) []machineprovi
return result
}

// updatedNonDeletedMachines returns the list of MachineInfo which have an Updated (Spec up-to-date and Ready) Machine and
// are not pending deletion.
func updatedNonDeletedMachines(machinesInfo []machineproviders.MachineInfo) []machineproviders.MachineInfo {
result := []machineproviders.MachineInfo{}

for i := range machinesInfo {
if machinesInfo[i].Ready && !machinesInfo[i].NeedsUpdate && !isDeletedMachine(machinesInfo[i]) {
result = append(result, machinesInfo[i])
}
}

return result
}

// readyMachines returns the list of MachineInfo which have a Ready Machine.
func readyMachines(machinesInfo []machineproviders.MachineInfo) []machineproviders.MachineInfo {
result := []machineproviders.MachineInfo{}
Expand Down
111 changes: 111 additions & 0 deletions pkg/controllers/controlplanemachineset/updates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,117 @@ var _ = Describe("reconcileMachineUpdates", func() {
}
},
}),
Entry("with no updates required, but a machine has been deleted", rollingUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
machineInfos: map[int32][]machineproviders.MachineInfo{
0: {updatedMachineBuilder.WithIndex(0).WithMachineName("machine-0").WithNodeName("node-0").Build()},
1: {updatedMachineBuilder.WithIndex(1).WithMachineName("machine-1").WithNodeName("node-1").WithMachineDeletionTimestamp(metav1.Now()).Build()},
2: {updatedMachineBuilder.WithIndex(2).WithMachineName("machine-2").WithNodeName("node-2").Build()},
},
setupMock: func() {
mockMachineProvider.EXPECT().CreateMachine(gomock.Any(), gomock.Any(), int32(1)).Times(1)
mockMachineProvider.EXPECT().DeleteMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
},
expectedLogsBuilder: func() []test.LogEntry {
return []test.LogEntry{
// We wouldn't normally continue operation when a Machine is pending removal, however,
// when a user has manually deleted a Machine and the etcd deletion hook is present,
// we still need to handle creating the replacement Machine to unblock the rollout.
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.RollingUpdate,
"index", 1,
"namespace", namespaceName,
"name", "machine-1",
},
Message: waitingForRemoved,
},
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.RollingUpdate,
"index", 1,
"namespace", namespaceName,
"name", "machine-1",
},
Message: createdReplacement,
},
}
},
}),
Entry("with no updates required, but a machine has been deleted, and its replacement is pending", rollingUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
machineInfos: map[int32][]machineproviders.MachineInfo{
0: {updatedMachineBuilder.WithIndex(0).WithMachineName("machine-0").WithNodeName("node-0").Build()},
1: {
updatedMachineBuilder.WithIndex(1).WithMachineName("machine-1").WithNodeName("node-1").WithMachineDeletionTimestamp(metav1.Now()).Build(),
pendingMachineBuilder.WithIndex(1).WithMachineName("machine-replacement-1").Build(),
},
2: {updatedMachineBuilder.WithIndex(2).WithMachineName("machine-2").WithNodeName("node-2").Build()},
},
setupMock: func() {
mockMachineProvider.EXPECT().CreateMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
mockMachineProvider.EXPECT().DeleteMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
},
expectedLogsBuilder: func() []test.LogEntry {
return []test.LogEntry{
// We wouldn't normally continue operation when a Machine is pending removal, however,
// when a user has manually deleted a Machine and the etcd deletion hook is present,
// we still need to handle creating the replacement Machine to unblock the rollout.
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.RollingUpdate,
"index", 1,
"namespace", namespaceName,
"name", "machine-1",
},
Message: waitingForRemoved,
},
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.RollingUpdate,
"index", 1,
"namespace", namespaceName,
"name", "machine-1",
"replacementName", "machine-replacement-1",
},
Message: waitingForReplacement,
},
}
},
}),
Entry("with no updates required, but a machine has been deleted, and its replacement is ready", rollingUpdateTableInput{
cpmsBuilder: cpmsBuilder.WithReplicas(3),
machineInfos: map[int32][]machineproviders.MachineInfo{
0: {updatedMachineBuilder.WithIndex(0).WithMachineName("machine-0").WithNodeName("node-0").Build()},
1: {
updatedMachineBuilder.WithIndex(1).WithMachineName("machine-1").WithNodeName("node-1").WithMachineDeletionTimestamp(metav1.Now()).Build(),
updatedMachineBuilder.WithIndex(1).WithMachineName("machine-replacement-1").WithNodeName("node-replacement-1").Build(),
},
2: {updatedMachineBuilder.WithIndex(2).WithMachineName("machine-2").WithNodeName("node-2").Build()},
},
setupMock: func() {
mockMachineProvider.EXPECT().CreateMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
mockMachineProvider.EXPECT().DeleteMachine(gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
},
expectedLogsBuilder: func() []test.LogEntry {
return []test.LogEntry{
{
Level: 2,
KeysAndValues: []interface{}{
"updateStrategy", machinev1.RollingUpdate,
"index", 1,
"namespace", namespaceName,
"name", "machine-1",
},
Message: waitingForRemoved,
},
}
},
}),
)
})

Expand Down

0 comments on commit 4260e66

Please sign in to comment.