Skip to content

Commit

Permalink
Timeout error handling for CL2 large tests handled (#166)
Browse files Browse the repository at this point in the history
  • Loading branch information
njtran authored Sep 4, 2020
1 parent fd5eb04 commit 6bb8719
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions eks/cluster-loader/clusterloader2/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ import (
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
v1 "k8s.io/api/batch/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
)

// IndentedNewline covers formatting issues with gotemplates
Expand Down Expand Up @@ -69,19 +71,28 @@ func (c *ClusterLoader) Apply() (err error) {

// Wait for job to complete -- 2 hours because larger tests take a very long time.
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Hour)
defer cancel()
job := &v1.Job{}
for job.Status.Succeeded < 1 {
job, err = c.K8sClient.KubernetesClientSet().
BatchV1().
Jobs("clusterloader2").
Get(ctx, "clusterloader2", metav1.GetOptions{})
if err != nil {
cancel()
return fmt.Errorf("failed to get cl2 job (%v)", err)
if errors.IsTimeout(err) {
err = retry.OnError(retry.DefaultRetry, errors.IsTimeout, func() error {
job, err = c.K8sClient.KubernetesClientSet().
BatchV1().
Jobs("clusterloader2").
Get(ctx, "clusterloader2", metav1.GetOptions{})
return fmt.Errorf("failed to get cl2 job (%v)", err)
})
} else if !errors.IsTimeout(err) {
return fmt.Errorf("failed to get cl2 job (%v)", err)
}
}
time.Sleep(10 * time.Second)
}
cancel()
return nil
}

Expand Down

0 comments on commit 6bb8719

Please sign in to comment.