Skip to content

Commit

Permalink
backport of commit 129dda0
Browse files Browse the repository at this point in the history
  • Loading branch information
lgfa29 committed Mar 24, 2023
1 parent b7d3460 commit bcd49cb
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 21 deletions.
25 changes: 17 additions & 8 deletions scheduler/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -1078,13 +1078,21 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others al
stop := make(allocSet)
reconnect := make(allocSet)

// Mark all failed reconnects for stop.
failedReconnects := reconnecting.filterByFailedReconnect()
stop = stop.union(failedReconnects)
a.markStop(failedReconnects, structs.AllocClientStatusFailed, allocRescheduled)
successfulReconnects := reconnecting.difference(failedReconnects)
for _, reconnectingAlloc := range reconnecting {
// Stop allocations that failed to reconnect.
reconnectFailed := !reconnectingAlloc.ServerTerminalStatus() &&
reconnectingAlloc.ClientStatus == structs.AllocClientStatusFailed

if reconnectFailed {
stop[reconnectingAlloc.ID] = reconnectingAlloc
a.result.stop = append(a.result.stop, allocStopResult{
alloc: reconnectingAlloc,
clientStatus: structs.AllocClientStatusFailed,
statusDescription: allocRescheduled,
})
continue
}

for _, reconnectingAlloc := range successfulReconnects {
// If the desired status is not run, or if the user-specified desired
// transition is not run, stop the reconnecting allocation.
stopReconnecting := reconnectingAlloc.DesiredStatus != structs.AllocDesiredStatusRun ||
Expand All @@ -1106,6 +1114,7 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others al
// Find replacement allocations and decide which one to stop. A
// reconnecting allocation may have multiple replacements.
for _, replacementAlloc := range others {

// Skip allocations that are not a replacement of the one
// reconnecting. Replacement allocations have the same name but a
// higher CreateIndex and a different ID.
Expand All @@ -1116,7 +1125,7 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others al
// Skip allocations that are server terminal.
// We don't want to replace a reconnecting allocation with one that
// is or will terminate and we don't need to stop them since they
// are marked as terminal by the servers.
// are already marked as terminal by the servers.
if !isReplacement || replacementAlloc.ServerTerminalStatus() {
continue
}
Expand Down Expand Up @@ -1146,7 +1155,7 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others al
}

// Any reconnecting allocation not set to stop must be reconnected.
for _, alloc := range successfulReconnects {
for _, alloc := range reconnecting {
if _, ok := stop[alloc.ID]; !ok {
reconnect[alloc.ID] = alloc
}
Expand Down
13 changes: 0 additions & 13 deletions scheduler/reconcile_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,19 +520,6 @@ func (a allocSet) filterByDeployment(id string) (match, nonmatch allocSet) {
return
}

// filterByFailedReconnect filters allocation into a set that have failed on the
// client but do not have a terminal status at the server so that they can be
// marked as stop at the server.
func (a allocSet) filterByFailedReconnect() allocSet {
failed := make(allocSet)
for _, alloc := range a {
if !alloc.ServerTerminalStatus() && alloc.ClientStatus == structs.AllocClientStatusFailed {
failed[alloc.ID] = alloc
}
}
return failed
}

// delayByStopAfterClientDisconnect returns a delay for any lost allocation that's got a
// stop_after_client_disconnect configured
func (a allocSet) delayByStopAfterClientDisconnect() (later []*delayedRescheduleInfo) {
Expand Down

0 comments on commit bcd49cb

Please sign in to comment.