Skip to content

Commit

Permalink
Improved context with timeout for DrainNode (#920)
Browse files Browse the repository at this point in the history
* improved drain timeout calculation

* removed select for context cancellation

* passing drain context only in drainNode

* removed effective drain timeout modification

* create drainContext in RunDrain

* remove time.Sleep
  • Loading branch information
sssash18 authored Jul 10, 2024
1 parent 2c1c4e4 commit e358db1
Showing 1 changed file with 8 additions and 10 deletions.
18 changes: 8 additions & 10 deletions pkg/util/provider/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ func NewDrainOptions(
// RunDrain runs the 'drain' command
func (o *Options) RunDrain(ctx context.Context) error {
o.drainStartedOn = time.Now()
drainContext, cancelFn := context.WithDeadline(ctx, o.drainStartedOn.Add(o.Timeout))
klog.V(4).Infof(
"Machine drain started on %s for %q",
o.drainStartedOn,
Expand All @@ -197,6 +198,7 @@ func (o *Options) RunDrain(ctx context.Context) error {

defer func() {
o.drainEndedOn = time.Now()
cancelFn()
klog.Infof(
"Machine drain ended on %s and took %s for %q",
o.drainEndedOn,
Expand All @@ -205,12 +207,12 @@ func (o *Options) RunDrain(ctx context.Context) error {
)
}()

if err := o.RunCordonOrUncordon(ctx, true); err != nil {
if err := o.RunCordonOrUncordon(drainContext, true); err != nil {
klog.Errorf("Drain Error: Cordoning of node failed with error: %v", err)
return err
}

err := o.deleteOrEvictPodsSimple(ctx)
err := o.deleteOrEvictPodsSimple(drainContext)
return err
}

Expand Down Expand Up @@ -653,16 +655,12 @@ func (o *Options) evictPodsWithPVInternal(
returnCh chan error,
) (remainingPods []*corev1.Pod, fastTrack bool) {
var (
mainContext context.Context
cancelMainContext context.CancelFunc
retryPods []*corev1.Pod
retryPods []*corev1.Pod
)
mainContext, cancelMainContext = context.WithDeadline(ctx, o.drainStartedOn.Add(o.Timeout))
defer cancelMainContext()

for i, pod := range pods {
select {
case <-mainContext.Done():
case <-ctx.Done():
// Timeout occurred. Abort and report the remaining pods.
returnCh <- nil
return append(retryPods, pods[i+1:]...), true
Expand Down Expand Up @@ -739,7 +737,7 @@ func (o *Options) evictPodsWithPVInternal(
)

podVolumeInfo := podVolumeInfoMap[getPodKey(pod)]
ctx, cancelFn := context.WithTimeout(mainContext, o.getTerminationGracePeriod(pod)+o.PvDetachTimeout)
ctx, cancelFn := context.WithTimeout(ctx, o.getTerminationGracePeriod(pod)+o.PvDetachTimeout)
err = o.waitForDetach(ctx, podVolumeInfo, o.nodeName)
cancelFn()

Expand All @@ -762,7 +760,7 @@ func (o *Options) evictPodsWithPVInternal(
time.Since(podEvictionStartTime),
)

ctx, cancelFn = context.WithTimeout(mainContext, o.PvReattachTimeout)
ctx, cancelFn = context.WithTimeout(ctx, o.PvReattachTimeout)
err = o.waitForReattach(ctx, podVolumeInfo, o.nodeName, volumeAttachmentEventCh)
cancelFn()

Expand Down

0 comments on commit e358db1

Please sign in to comment.