From de7554e6b19a8619e7d505e84df7ea0184fc015f Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Tue, 6 Feb 2024 14:32:09 -0500 Subject: [PATCH] Do not set stdout error on EOF retry stdoutErr is use to determine `errDetail` that's used in `kw.UpdateBasicStatus(WorkStateFailed, errDetail, stdout.Size())` in case where we retried 5 time and did not read any new log messages it is not an error it's the expected happy path so we should not set stdoutErr --- pkg/workceptor/kubernetes.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pkg/workceptor/kubernetes.go b/pkg/workceptor/kubernetes.go index 225ec789a..a0d7c0031 100644 --- a/pkg/workceptor/kubernetes.go +++ b/pkg/workceptor/kubernetes.go @@ -241,7 +241,8 @@ func (kw *kubeUnit) kubeLoggingWithReconnect(streamWait *sync.WaitGroup, stdout kw.GetWorkceptor().nc.GetLogger().Info( "Context was canceled while reading logs for pod %s/%s. Assuming pod has finished", podNamespace, - podName) + podName, + ) return } @@ -260,9 +261,16 @@ func (kw *kubeUnit) kubeLoggingWithReconnect(streamWait *sync.WaitGroup, stdout break } - *stdoutErr = err + kw.GetWorkceptor().nc.GetLogger().Error("Error reading from pod %s/%s: %s", podNamespace, podName, err) + // At this point we exausted all retries, every retry we either failed to read OR we read but did not get newer msg + // If we got a EOF on the last retry we assume that we read everything and we can stop the loop + // we ASSUME this is the happy path. + if err != io.EOF { + *stdoutErr = err + } + return } @@ -487,6 +495,7 @@ func (kw *kubeUnit) runWorkUsingLogger() { if podName == "" { // create new pod if ked.PodName is empty + // TODO: add retry logic to make this more resilient to transient errors if err := kw.createPod(nil); err != nil { if err != ErrPodCompleted { errMsg := fmt.Sprintf("Error creating pod: %s", err)