From ac6d5232df7a6781e45ccf6063acecfab664c825 Mon Sep 17 00:00:00 2001 From: Xinru Zhang Date: Wed, 7 Dec 2022 07:57:51 -0500 Subject: [PATCH] TEP-0121: Reconciler Implementation Prior to this commit, `retries` logic for TaskRun is handled by Tekton `PipelineRun` reconciler. This commit delegates the retries implementation for `TaskRun` to the `TaskRun` reconciler. The major change is we stopped relying on `len(retriesStatus)` to decide if a target `TaskRun` failed or not. Instead, we use status `ConditionSuceeded` to gate the completion of a `TaskRun`. Even a `TaskRun` failed on one execution, as long as it has remaining retries, the TaskRun won't be stored in etcd with its status set as `Failed`. Instead, the status will be: ```yaml Type: Succeeded Status: Unknown Reason: ToBeRetried ``` --- pkg/pod/status.go | 24 +- pkg/pod/status_test.go | 37 +- pkg/reconciler/pipelinerun/pipelinerun.go | 31 +- .../pipelinerun/pipelinerun_test.go | 124 ++----- .../resources/pipelinerunresolution.go | 32 +- .../resources/pipelinerunresolution_test.go | 215 ++--------- .../pipelinerun/resources/pipelinerunstate.go | 45 --- .../resources/pipelinerunstate_test.go | 143 +------- pkg/reconciler/taskrun/taskrun.go | 22 +- pkg/reconciler/taskrun/taskrun_test.go | 343 +++++++++++++++++- 10 files changed, 478 insertions(+), 538 deletions(-) diff --git a/pkg/pod/status.go b/pkg/pod/status.go index 0a0894981eb..7206f80c567 100644 --- a/pkg/pod/status.go +++ b/pkg/pod/status.go @@ -42,7 +42,7 @@ const ( ReasonFailedResolution = "TaskRunResolutionFailed" // ReasonFailedValidation indicated that the reason for failure status is - // that taskrun failed runtime validation + // that TaskRun failed runtime validation ReasonFailedValidation = "TaskRunValidationFailed" // ReasonExceededResourceQuota indicates that the TaskRun failed to create a pod due to @@ -162,12 +162,12 @@ func setTaskRunStatusBasedOnStepStatus(logger *zap.SugaredLogger, stepStatuses [ } else { time, err := extractStartedAtTimeFromResults(results) if err != nil { - logger.Errorf("error setting the start time of step %q in taskrun %q: %v", s.Name, tr.Name, err) + logger.Errorf("error setting the start time of step %q in TaskRun %q: %v", s.Name, tr.Name, err) merr = multierror.Append(merr, err) } exitCode, err := extractExitCodeFromResults(results) if err != nil { - logger.Errorf("error extracting the exit code of step %q in taskrun %q: %v", s.Name, tr.Name, err) + logger.Errorf("error extracting the exit code of step %q in TaskRun %q: %v", s.Name, tr.Name, err) merr = multierror.Append(merr, err) } taskResults, pipelineResourceResults, filteredResults := filterResultsAndResources(results) @@ -335,7 +335,7 @@ func updateIncompleteTaskRunStatus(trs *v1beta1.TaskRunStatus, pod *corev1.Pod) } } -// DidTaskRunFail check the status of pod to decide if related taskrun is failed +// DidTaskRunFail check the status of pod to decide if related TaskRun is failed func DidTaskRunFail(pod *corev1.Pod) bool { f := pod.Status.Phase == corev1.PodFailed for _, s := range pod.Status.ContainerStatuses { @@ -348,6 +348,16 @@ func DidTaskRunFail(pod *corev1.Pod) bool { return f } +// IsPodArchived indicates if a pod is archived in the retriesStatus. +func IsPodArchived(pod *corev1.Pod, trs *v1beta1.TaskRunStatus) bool { + for _, retryStatus := range trs.RetriesStatus { + if retryStatus.PodName == pod.GetName() { + return true + } + } + return false +} + func areStepsComplete(pod *corev1.Pod) bool { stepsComplete := len(pod.Status.ContainerStatuses) > 0 && pod.Status.Phase == corev1.PodRunning for _, s := range pod.Status.ContainerStatuses { @@ -478,7 +488,7 @@ func getWaitingMessage(pod *corev1.Pod) string { return "Pending" } -// markStatusRunning sets taskrun status to running +// markStatusRunning sets TaskRun status to running func markStatusRunning(trs *v1beta1.TaskRunStatus, reason, message string) { trs.SetCondition(&apis.Condition{ Type: apis.ConditionSucceeded, @@ -488,7 +498,7 @@ func markStatusRunning(trs *v1beta1.TaskRunStatus, reason, message string) { }) } -// markStatusFailure sets taskrun status to failure with specified reason +// markStatusFailure sets TaskRun status to failure with specified reason func markStatusFailure(trs *v1beta1.TaskRunStatus, reason string, message string) { trs.SetCondition(&apis.Condition{ Type: apis.ConditionSucceeded, @@ -498,7 +508,7 @@ func markStatusFailure(trs *v1beta1.TaskRunStatus, reason string, message string }) } -// markStatusSuccess sets taskrun status to success +// markStatusSuccess sets TaskRun status to success func markStatusSuccess(trs *v1beta1.TaskRunStatus) { trs.SetCondition(&apis.Condition{ Type: apis.ConditionSucceeded, diff --git a/pkg/pod/status_test.go b/pkg/pod/status_test.go index c44f2b679ac..1a8aa7a632f 100644 --- a/pkg/pod/status_test.go +++ b/pkg/pod/status_test.go @@ -98,7 +98,6 @@ func TestMakeTaskRunStatus(t *testing.T) { }{{ desc: "empty", podStatus: corev1.PodStatus{}, - want: v1beta1.TaskRunStatus{ Status: statusRunning(), TaskRunStatusFields: v1beta1.TaskRunStatusFields{ @@ -1551,6 +1550,42 @@ func TestMarkStatusSuccess(t *testing.T) { } } +func TestIsPodArchived(t *testing.T) { + for _, tc := range []struct { + name string + podName string + retriesStatus []v1beta1.TaskRunStatus + wantIsArchived bool + }{{ + name: "Pod is not in the retriesStatus", + podName: "pod", + retriesStatus: []v1beta1.TaskRunStatus{}, + wantIsArchived: false, + }, { + name: "Pod is in the retriesStatus", + podName: "pod", + retriesStatus: []v1beta1.TaskRunStatus{{ + TaskRunStatusFields: v1beta1.TaskRunStatusFields{ + PodName: "pod", + }}, + }, + wantIsArchived: true, + }} { + t.Run(tc.name, func(t *testing.T) { + trs := v1beta1.TaskRunStatus{ + TaskRunStatusFields: v1beta1.TaskRunStatusFields{ + PodName: "pod", + RetriesStatus: tc.retriesStatus, + }, + } + gotArchived := IsPodArchived(&corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: tc.podName}}, &trs) + if tc.wantIsArchived != gotArchived { + t.Errorf("IsPodArchived(): %v, expect %v", gotArchived, tc.wantIsArchived) + } + }) + } +} + func statusRunning() duckv1.Status { var trs v1beta1.TaskRunStatus markStatusRunning(&trs, v1beta1.TaskRunReasonRunning.String(), "Not all Steps in the Task have finished executing") diff --git a/pkg/reconciler/pipelinerun/pipelinerun.go b/pkg/reconciler/pipelinerun/pipelinerun.go index 5d8dbfbd9bd..cfb819757b6 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun.go +++ b/pkg/reconciler/pipelinerun/pipelinerun.go @@ -850,26 +850,10 @@ func (c *Reconciler) createTaskRuns(ctx context.Context, rpt *resources.Resolved func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, params []v1beta1.Param, rpt *resources.ResolvedPipelineTask, pr *v1beta1.PipelineRun, storageBasePath string) (*v1beta1.TaskRun, error) { logger := logging.FromContext(ctx) - tr, _ := c.taskRunLister.TaskRuns(pr.Namespace).Get(taskRunName) - if tr != nil { - // retry should happen only when the taskrun has failed - if !tr.Status.GetCondition(apis.ConditionSucceeded).IsFalse() { - return tr, nil - } - // Don't modify the lister cache's copy. - tr = tr.DeepCopy() - // is a retry - addRetryHistory(tr) - clearStatus(tr) - tr.Status.MarkResourceOngoing("", "") - logger.Infof("Updating taskrun %s with cleared status and retry history (length: %d).", tr.GetName(), len(tr.Status.RetriesStatus)) - return c.PipelineClientSet.TektonV1beta1().TaskRuns(pr.Namespace).UpdateStatus(ctx, tr, metav1.UpdateOptions{}) - } - rpt.PipelineTask = resources.ApplyPipelineTaskContexts(rpt.PipelineTask) taskRunSpec := pr.GetTaskRunSpec(rpt.PipelineTask.Name) params = append(params, rpt.PipelineTask.Params...) - tr = &v1beta1.TaskRun{ + tr := &v1beta1.TaskRun{ ObjectMeta: metav1.ObjectMeta{ Name: taskRunName, Namespace: pr.Namespace, @@ -878,6 +862,7 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para Annotations: combineTaskRunAndTaskSpecAnnotations(pr, rpt.PipelineTask), }, Spec: v1beta1.TaskRunSpec{ + Retries: rpt.PipelineTask.Retries, Params: params, ServiceAccountName: taskRunSpec.TaskServiceAccountName, PodTemplate: taskRunSpec.TaskPodTemplate, @@ -1092,18 +1077,6 @@ func combinedSubPath(workspaceSubPath string, pipelineTaskSubPath string) string return filepath.Join(workspaceSubPath, pipelineTaskSubPath) } -func addRetryHistory(tr *v1beta1.TaskRun) { - newStatus := *tr.Status.DeepCopy() - newStatus.RetriesStatus = nil - tr.Status.RetriesStatus = append(tr.Status.RetriesStatus, newStatus) -} - -func clearStatus(tr *v1beta1.TaskRun) { - tr.Status.StartTime = nil - tr.Status.CompletionTime = nil - tr.Status.PodName = "" -} - func getTaskrunAnnotations(pr *v1beta1.PipelineRun) map[string]string { // Propagate annotations from PipelineRun to TaskRun. annotations := make(map[string]string, len(pr.ObjectMeta.Annotations)+1) diff --git a/pkg/reconciler/pipelinerun/pipelinerun_test.go b/pkg/reconciler/pipelinerun/pipelinerun_test.go index 1de4a6d6de7..babfb52fb64 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun_test.go +++ b/pkg/reconciler/pipelinerun/pipelinerun_test.go @@ -494,6 +494,7 @@ spec: value: test-pipeline - name: contextRetriesParam value: "5" + retries: 5 resources: inputs: - name: workspace @@ -3335,103 +3336,6 @@ spec: } } -// TestReconcileWithTimeoutAndRetry runs "Reconcile" against pipelines with -// retries and timeout settings, and status that represents different number of -// retries already performed. It verifies the reconciled status and events -// generated -func TestReconcileWithTimeoutAndRetry(t *testing.T) { - for _, tc := range []struct { - name string - retries int - conditionSucceeded corev1.ConditionStatus - wantEvents []string - }{{ - name: "One try has to be done", - retries: 1, - conditionSucceeded: corev1.ConditionFalse, - wantEvents: []string{ - "Warning Failed PipelineRun \"test-pipeline-retry-run-with-timeout\" failed to finish within", - }, - }, { - name: "No more retries are needed", - retries: 2, - conditionSucceeded: corev1.ConditionUnknown, - wantEvents: []string{ - "Warning Failed PipelineRun \"test-pipeline-retry-run-with-timeout\" failed to finish within", - }, - }} { - t.Run(tc.name, func(t *testing.T) { - ps := []*v1beta1.Pipeline{parse.MustParseV1beta1Pipeline(t, fmt.Sprintf(` -metadata: - name: test-pipeline-retry - namespace: foo -spec: - tasks: - - name: hello-world-1 - retries: %d - taskRef: - name: hello-world -`, tc.retries))} - prs := []*v1beta1.PipelineRun{parse.MustParseV1beta1PipelineRun(t, ` -metadata: - name: test-pipeline-retry-run-with-timeout - namespace: foo -spec: - pipelineRef: - name: test-pipeline-retry - serviceAccountName: test-sa - timeout: 12h0m0s -status: - startTime: "2021-12-31T00:00:00Z" -`)} - - ts := []*v1beta1.Task{ - simpleHelloWorldTask, - } - trs := []*v1beta1.TaskRun{parse.MustParseV1beta1TaskRun(t, ` -metadata: - name: hello-world-1 - namespace: foo -status: - conditions: - - status: "False" - type: Succeeded - podName: my-pod-name - retriesStatus: - - conditions: - - status: "False" - type: Succeeded -`)} - - prtrs := &v1beta1.PipelineRunTaskRunStatus{ - PipelineTaskName: "hello-world-1", - Status: &trs[0].Status, - } - prs[0].Status.TaskRuns = make(map[string]*v1beta1.PipelineRunTaskRunStatus) - prs[0].Status.TaskRuns["hello-world-1"] = prtrs - - d := test.Data{ - PipelineRuns: prs, - Pipelines: ps, - Tasks: ts, - TaskRuns: trs, - } - prt := newPipelineRunTest(d, t) - defer prt.Cancel() - - reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-retry-run-with-timeout", []string{}, false) - - if len(reconciledRun.Status.TaskRuns["hello-world-1"].Status.RetriesStatus) != tc.retries { - t.Fatalf(" %d retries expected but got %d ", tc.retries, len(reconciledRun.Status.TaskRuns["hello-world-1"].Status.RetriesStatus)) - } - - if status := reconciledRun.Status.TaskRuns["hello-world-1"].Status.GetCondition(apis.ConditionSucceeded).Status; status != tc.conditionSucceeded { - t.Fatalf("Succeeded expected to be %s but is %s", tc.conditionSucceeded, status) - } - }) - } -} - // TestReconcileAndPropagateCustomPipelineTaskRunSpec tests that custom PipelineTaskRunSpec declared // in PipelineRun is propagated to created TaskRuns func TestReconcileAndPropagateCustomPipelineTaskRunSpec(t *testing.T) { @@ -9464,6 +9368,7 @@ spec: "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: linux @@ -9478,12 +9383,17 @@ status: conditions: - type: Succeeded status: "False" + retriesStatus: + - conditions: + - status: "False" + type: Succeeded `), mustParseTaskRunWithObjectMeta(t, taskRunObjectMeta("pr-platforms-and-browsers-1", "foo", "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: mac @@ -9598,6 +9508,7 @@ status: "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: linux @@ -9611,7 +9522,7 @@ spec: status: conditions: - type: Succeeded - status: "Unknown" + status: "False" retriesStatus: - conditions: - status: "False" @@ -9622,6 +9533,7 @@ status: "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: mac @@ -9646,6 +9558,7 @@ status: "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: linux @@ -9659,13 +9572,18 @@ spec: status: conditions: - type: Succeeded - status: "False" + status: "Unknown" + retriesStatus: + - conditions: + - status: "False" + type: Succeeded `), mustParseTaskRunWithObjectMeta(t, taskRunObjectMeta("pr-platforms-and-browsers-1", "foo", "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: mac @@ -9679,7 +9597,11 @@ spec: status: conditions: - type: Succeeded - status: "False" + status: "Unknown" + retriesStatus: + - conditions: + - status: "False" + type: Succeeded `), }, prs: []*v1beta1.PipelineRun{ @@ -9780,6 +9702,7 @@ status: "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: linux @@ -9804,6 +9727,7 @@ status: "pr", "p", "platforms-and-browsers", false), ` spec: + retries: 1 params: - name: platform value: mac diff --git a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go index 0566eff244c..7816b7d7dbf 100644 --- a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go +++ b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go @@ -167,7 +167,7 @@ func (t ResolvedPipelineTask) isFailure() bool { atLeastOneFailed := false for _, run := range t.Runs { isDone = isDone && run.IsDone() - runFailed := run.Status.GetCondition(apis.ConditionSucceeded).IsFalse() && !t.hasRemainingRetries() + runFailed := run.Status.GetCondition(apis.ConditionSucceeded).IsFalse() && !t.isCustomTaskRetriable() atLeastOneFailed = atLeastOneFailed || runFailed } return atLeastOneFailed && isDone @@ -177,6 +177,7 @@ func (t ResolvedPipelineTask) isFailure() bool { } c = t.Run.Status.GetCondition(apis.ConditionSucceeded) isDone = t.Run.IsDone() + return isDone && c.IsFalse() && !t.isCustomTaskRetriable() case t.IsMatrixed(): if len(t.TaskRuns) == 0 { return false @@ -185,7 +186,7 @@ func (t ResolvedPipelineTask) isFailure() bool { atLeastOneFailed := false for _, taskRun := range t.TaskRuns { isDone = isDone && taskRun.IsDone() - taskRunFailed := taskRun.Status.GetCondition(apis.ConditionSucceeded).IsFalse() && !t.hasRemainingRetries() + taskRunFailed := taskRun.Status.GetCondition(apis.ConditionSucceeded).IsFalse() atLeastOneFailed = atLeastOneFailed || taskRunFailed } return atLeastOneFailed && isDone @@ -195,13 +196,15 @@ func (t ResolvedPipelineTask) isFailure() bool { } c = t.TaskRun.Status.GetCondition(apis.ConditionSucceeded) isDone = t.TaskRun.IsDone() + return isDone && c.IsFalse() } - return isDone && c.IsFalse() && !t.hasRemainingRetries() } -// hasRemainingRetries returns true only when the number of retries already attempted -// is less than the number of retries allowed. -func (t ResolvedPipelineTask) hasRemainingRetries() bool { +// isCustomTaskRetriable returns true only when the number of retries already attempted +// is less than the number of retries allowed for v1alpha1.Run. +// +// This should be removed once v1alpha1.Run is fully deprecated. +func (t ResolvedPipelineTask) isCustomTaskRetriable() bool { var retriesDone int switch { case t.IsCustomTask() && t.IsMatrixed(): @@ -221,23 +224,8 @@ func (t ResolvedPipelineTask) hasRemainingRetries() bool { return true } retriesDone = len(t.Run.Status.RetriesStatus) - case t.IsMatrixed(): - if len(t.TaskRuns) == 0 { - return true - } - // has remaining retries when any TaskRun has a remaining retry - for _, taskRun := range t.TaskRuns { - retriesDone = len(taskRun.Status.RetriesStatus) - if retriesDone < t.PipelineTask.Retries { - return true - } - } - return false default: - if t.TaskRun == nil { - return true - } - retriesDone = len(t.TaskRun.Status.RetriesStatus) + return false } return retriesDone < t.PipelineTask.Retries } diff --git a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution_test.go b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution_test.go index 4af245bd31a..ccfcd48954d 100644 --- a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution_test.go +++ b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution_test.go @@ -465,22 +465,6 @@ var finalScheduledState = PipelineRunState{{ }, }} -var retryableFinalState = PipelineRunState{{ - PipelineTask: &pts[0], - TaskRunName: "pipelinerun-mytask1", - TaskRun: makeSucceeded(trs[0]), - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, -}, { - PipelineTask: &pts[3], - TaskRunName: "pipelinerun-mytask4", - TaskRun: makeFailed(trs[2]), - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, -}} - var allFinishedState = PipelineRunState{{ PipelineTask: &pts[0], TaskRunName: "pipelinerun-mytask1", @@ -1294,23 +1278,7 @@ func TestIsFailure(t *testing.T) { }, want: true, }, { - name: "taskrun failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, - TaskRun: makeFailed(trs[0]), - }, - want: false, - }, { - - name: "run failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, - CustomTask: true, - Run: makeRunFailed(runs[0]), - }, - want: false, - }, { - name: "taskrun failed: no retries remaining", + name: "taskrun failed - Retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, TaskRun: withRetries(makeFailed(trs[0])), @@ -1318,7 +1286,7 @@ func TestIsFailure(t *testing.T) { want: true, }, { - name: "run failed: no retries remaining", + name: "run failed - Retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, CustomTask: true, @@ -1386,14 +1354,14 @@ func TestIsFailure(t *testing.T) { }, want: true, }, { - name: "taskrun cancelled: no retries remaining", + name: "taskrun cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, TaskRun: withCancelled(withRetries(makeFailed(trs[0]))), }, want: true, }, { - name: "run cancelled: no retries remaining", + name: "run cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, Run: withRunCancelled(withRunRetries(makeRunFailed(runs[0]))), @@ -1496,44 +1464,14 @@ func TestIsFailure(t *testing.T) { }, want: false, }, { - name: "matrixed taskruns failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), makeFailed(trs[1])}, - }, - want: false, - }, { - name: "matrixed runs failed: retries remaining", - rpt: ResolvedPipelineTask{ - CustomTask: true, - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - Runs: []*v1alpha1.Run{makeRunFailed(runs[0]), makeRunFailed(runs[1])}, - }, - want: false, - }, { - name: "matrixed taskruns failed: one taskrun with retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), withRetries(makeFailed(trs[1]))}, - }, - want: false, - }, { - name: "matrixed runs failed: one run with retries remaining", - rpt: ResolvedPipelineTask{ - CustomTask: true, - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - Runs: []*v1alpha1.Run{makeRunFailed(runs[0]), withRunRetries(makeRunFailed(runs[1]))}, - }, - want: false, - }, { - name: "matrixed taskruns failed: no retries remaining", + name: "matrixed taskruns failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withRetries(makeFailed(trs[0])), withRetries(makeFailed(trs[1]))}, }, want: true, }, { - name: "matrixed runs failed: no retries remaining", + name: "matrixed runs failed: retried", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -1631,14 +1569,14 @@ func TestIsFailure(t *testing.T) { }, want: false, }, { - name: "matrixed taskruns cancelled: no retries remaining", + name: "matrixed taskruns cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), withCancelled(withRetries(makeFailed(trs[1])))}, }, want: true, }, { - name: "matrixed runs cancelled: no retries remaining", + name: "matrixed runs cancelled: retried", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -1646,14 +1584,14 @@ func TestIsFailure(t *testing.T) { }, want: true, }, { - name: "one matrixed taskrun cancelled: no retries remaining, one matrixed taskrun running", + name: "one matrixed taskrun cancelled: retried, one matrixed taskrun running", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), makeStarted(trs[1])}, }, want: false, }, { - name: "one matrixed run cancelled: no retries remaining, one matrixed run running", + name: "one matrixed run cancelled: retried, one matrixed run running", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -3945,31 +3883,14 @@ func TestIsSuccessful(t *testing.T) { }, want: false, }, { - name: "taskrun failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, - TaskRun: makeFailed(trs[0]), - }, - want: false, - }, { - - name: "run failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, - CustomTask: true, - Run: makeRunFailed(runs[0]), - }, - want: false, - }, { - name: "taskrun failed: no retries remaining", + name: "taskrun failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, TaskRun: withRetries(makeFailed(trs[0])), }, want: false, }, { - - name: "run failed: no retries remaining", + name: "run failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, CustomTask: true, @@ -4022,14 +3943,14 @@ func TestIsSuccessful(t *testing.T) { }, want: false, }, { - name: "taskrun cancelled: no retries remaining", + name: "taskrun cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, TaskRun: withCancelled(withRetries(makeFailed(trs[0]))), }, want: false, }, { - name: "run cancelled: no retries remaining", + name: "run cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, Run: withRunCancelled(withRunRetries(makeRunFailed(runs[0]))), @@ -4140,44 +4061,14 @@ func TestIsSuccessful(t *testing.T) { }, want: false, }, { - name: "matrixed taskruns failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), makeFailed(trs[1])}, - }, - want: false, - }, { - name: "matrixed runs failed: retries remaining", - rpt: ResolvedPipelineTask{ - CustomTask: true, - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - Runs: []*v1alpha1.Run{makeRunFailed(runs[0]), makeRunFailed(runs[1])}, - }, - want: false, - }, { - name: "matrixed taskruns failed: one taskrun with retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), withRetries(makeFailed(trs[1]))}, - }, - want: false, - }, { - name: "matrixed runs failed: one run with retries remaining", - rpt: ResolvedPipelineTask{ - CustomTask: true, - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - Runs: []*v1alpha1.Run{makeRunFailed(runs[0]), withRunRetries(makeRunFailed(runs[1]))}, - }, - want: false, - }, { - name: "matrixed taskruns failed: no retries remaining", + name: "matrixed taskruns failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withRetries(makeFailed(trs[0])), withRetries(makeFailed(trs[1]))}, }, want: false, }, { - name: "matrixed runs failed: no retries remaining", + name: "matrixed runs failed: retried", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -4275,14 +4166,14 @@ func TestIsSuccessful(t *testing.T) { }, want: false, }, { - name: "matrixed taskruns cancelled: no retries remaining", + name: "matrixed taskruns cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), withCancelled(withRetries(makeFailed(trs[1])))}, }, want: false, }, { - name: "matrixed runs cancelled: no retries remaining", + name: "matrixed runs cancelled: retried", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -4290,14 +4181,14 @@ func TestIsSuccessful(t *testing.T) { }, want: false, }, { - name: "one matrixed taskrun cancelled: no retries remaining, one matrixed taskrun running", + name: "one matrixed taskrun cancelled: retried, one matrixed taskrun running", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), makeStarted(trs[1])}, }, want: false, }, { - name: "one matrixed run cancelled: no retries remaining, one matrixed run running", + name: "one matrixed run cancelled: retried, one matrixed run running", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -4381,23 +4272,7 @@ func TestIsRunning(t *testing.T) { }, want: false, }, { - name: "taskrun failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, - TaskRun: makeFailed(trs[0]), - }, - want: true, - }, { - - name: "run failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, - CustomTask: true, - Run: makeRunFailed(runs[0]), - }, - want: true, - }, { - name: "taskrun failed: no retries remaining", + name: "taskrun failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, TaskRun: withRetries(makeFailed(trs[0])), @@ -4405,7 +4280,7 @@ func TestIsRunning(t *testing.T) { want: false, }, { - name: "run failed: no retries remaining", + name: "run failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, CustomTask: true, @@ -4458,14 +4333,14 @@ func TestIsRunning(t *testing.T) { }, want: false, }, { - name: "taskrun cancelled: no retries remaining", + name: "taskrun cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, TaskRun: withCancelled(withRetries(makeFailed(trs[0]))), }, want: false, }, { - name: "run cancelled: no retries remaining", + name: "run cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: &v1beta1.PipelineTask{Name: "task", Retries: 1}, Run: withRunCancelled(withRunRetries(makeRunFailed(runs[0]))), @@ -4561,44 +4436,14 @@ func TestIsRunning(t *testing.T) { }, want: true, }, { - name: "matrixed taskruns failed: retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), makeFailed(trs[1])}, - }, - want: true, - }, { - name: "matrixed runs failed: retries remaining", - rpt: ResolvedPipelineTask{ - CustomTask: true, - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - Runs: []*v1alpha1.Run{makeRunFailed(runs[0]), makeRunFailed(runs[1])}, - }, - want: true, - }, { - name: "matrixed taskruns failed: one taskrun with retries remaining", - rpt: ResolvedPipelineTask{ - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), withRetries(makeFailed(trs[1]))}, - }, - want: true, - }, { - name: "matrixed runs failed: one run with retries remaining", - rpt: ResolvedPipelineTask{ - CustomTask: true, - PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), - Runs: []*v1alpha1.Run{makeRunFailed(runs[0]), withRunRetries(makeRunFailed(runs[1]))}, - }, - want: true, - }, { - name: "matrixed taskruns failed: no retries remaining", + name: "matrixed taskruns failed: retried", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withRetries(makeFailed(trs[0])), withRetries(makeFailed(trs[1]))}, }, want: false, }, { - name: "matrixed runs failed: no retries remaining", + name: "matrixed runs failed: retried", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -4696,14 +4541,14 @@ func TestIsRunning(t *testing.T) { }, want: true, }, { - name: "matrixed taskruns cancelled: no retries remaining", + name: "matrixed taskruns cancelled: retried", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), withCancelled(withRetries(makeFailed(trs[1])))}, }, want: false, }, { - name: "matrixed runs cancelled: no retries remaining", + name: "matrixed runs cancelled: retried", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), @@ -4711,14 +4556,14 @@ func TestIsRunning(t *testing.T) { }, want: false, }, { - name: "one matrixed taskrun cancelled: no retries remaining, one matrixed taskrun running", + name: "one matrixed taskrun cancelled: retried, one matrixed taskrun running", rpt: ResolvedPipelineTask{ PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), makeStarted(trs[1])}, }, want: true, }, { - name: "one matrixed run cancelled: no retries remaining, one matrixed run running", + name: "one matrixed run cancelled: retried, one matrixed run running", rpt: ResolvedPipelineTask{ CustomTask: true, PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1), diff --git a/pkg/reconciler/pipelinerun/resources/pipelinerunstate.go b/pkg/reconciler/pipelinerun/resources/pipelinerunstate.go index a3b269d5fd8..7c77568d625 100644 --- a/pkg/reconciler/pipelinerun/resources/pipelinerunstate.go +++ b/pkg/reconciler/pipelinerun/resources/pipelinerunstate.go @@ -316,47 +316,6 @@ func (state PipelineRunState) getNextTasks(candidateTasks sets.String) []*Resolv } } } - tasks = append(tasks, state.getRetryableTasks(candidateTasks)...) - return tasks -} - -// getRetryableTasks returns a list of pipelinetasks which should be executed next when the pipelinerun is stopping, -// i.e. a list of failed pipelinetasks from candidateTasks which haven't exhausted their retries. Note that if a -// pipelinetask is cancelled, the retries are not exhausted - they are not retryable. -func (state PipelineRunState) getRetryableTasks(candidateTasks sets.String) []*ResolvedPipelineTask { - var tasks []*ResolvedPipelineTask - for _, t := range state { - if _, ok := candidateTasks[t.PipelineTask.Name]; ok { - var status *apis.Condition - switch { - case t.TaskRun != nil: - status = t.TaskRun.Status.GetCondition(apis.ConditionSucceeded) - case len(t.TaskRuns) != 0: - isDone := true - for _, taskRun := range t.TaskRuns { - isDone = isDone && taskRun.IsDone() - c := taskRun.Status.GetCondition(apis.ConditionSucceeded) - if c.IsFalse() { - status = c - } - } - case t.Run != nil: - status = t.Run.Status.GetCondition(apis.ConditionSucceeded) - case len(t.Runs) != 0: - isDone := true - for _, run := range t.Runs { - isDone = isDone && run.IsDone() - c := run.Status.GetCondition(apis.ConditionSucceeded) - if c.IsFalse() { - status = c - } - } - } - if status.IsFalse() && !t.isCancelled() && t.hasRemainingRetries() { - tasks = append(tasks, t) - } - } - } return tasks } @@ -416,10 +375,6 @@ func (facts *PipelineRunFacts) DAGExecutionQueue() (PipelineRunState, error) { } if !facts.IsStopping() && !facts.IsGracefullyStopped() { tasks = facts.State.getNextTasks(candidateTasks) - } else { - // when pipeline run is stopping normally or gracefully, do not schedule any new tasks and only - // wait for all running tasks to complete (including exhausting retries) and report their status - tasks = facts.State.getRetryableTasks(candidateTasks) } return tasks, nil } diff --git a/pkg/reconciler/pipelinerun/resources/pipelinerunstate_test.go b/pkg/reconciler/pipelinerun/resources/pipelinerunstate_test.go index 52c683f2126..06fd96cfd4e 100644 --- a/pkg/reconciler/pipelinerun/resources/pipelinerunstate_test.go +++ b/pkg/reconciler/pipelinerun/resources/pipelinerunstate_test.go @@ -95,7 +95,7 @@ func TestPipelineRunFacts_CheckDAGTasksDoneDone(t *testing.T) { var taskExpectedState = PipelineRunState{{ PipelineTask: &pts[4], // 2 retries needed TaskRunName: "pipelinerun-mytask1", - TaskRun: withRetries(makeFailed(trs[0])), + TaskRun: withRetries(makeStarted(trs[0])), ResolvedTaskResources: &resources.ResolvedTaskResources{ TaskSpec: &task.Spec, }, @@ -131,24 +131,6 @@ func TestPipelineRunFacts_CheckDAGTasksDoneDone(t *testing.T) { Run: makeRunFailed(runs[0]), }} - var taskFailedWithRetries = PipelineRunState{{ - PipelineTask: &pts[4], // 2 retries needed - TaskRunName: "pipelinerun-mytask1", - TaskRun: makeFailed(trs[0]), - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - }} - - var taskCancelledFailedWithRetries = PipelineRunState{{ - PipelineTask: &pts[4], // 2 retries needed - TaskRunName: "pipelinerun-mytask1", - TaskRun: withCancelled(makeFailed(trs[0])), - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - }} - tcs := []struct { name string state PipelineRunState @@ -204,16 +186,6 @@ func TestPipelineRunFacts_CheckDAGTasksDoneDone(t *testing.T) { state: runFailedState, expected: true, ptExpected: []bool{true}, - }, { - name: "run-failed-with-retries", - state: taskFailedWithRetries, - expected: false, - ptExpected: []bool{false}, - }, { - name: "run-cancelled-failed-with-retries", - state: taskCancelledFailedWithRetries, - expected: true, - ptExpected: []bool{true}, }} for _, tc := range tcs { @@ -240,7 +212,6 @@ func TestPipelineRunFacts_CheckDAGTasksDoneDone(t *testing.T) { if d := cmp.Diff(tc.ptExpected[i], isDone); d != "" { t.Errorf("Didn't get expected (ResolvedPipelineTask) isDone %s", diff.PrintWantGot(d)) } - } }) } @@ -619,15 +590,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { }, }} - var taskExpectedState = PipelineRunState{{ - PipelineTask: &pts[4], // 2 retries needed - TaskRunName: "pipelinerun-mytask1", - TaskRun: withRetries(makeFailed(trs[0])), - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - }} - var runCancelledByStatusState = PipelineRunState{{ PipelineTask: &pts[4], // 2 retries needed RunName: "pipelinerun-mytask1", @@ -678,16 +640,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { }, }} - var runExpectedState = PipelineRunState{{ - PipelineTask: &pts[4], // 2 retries needed - RunName: "pipelinerun-mytask1", - Run: withRunRetries(makeRunFailed(runs[0])), - CustomTask: true, - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - }} - var taskCancelledByStatusStateMatrix = PipelineRunState{{ PipelineTask: &pts[20], // 2 retries needed TaskRunNames: []string{"pipelinerun-mytask1"}, @@ -733,15 +685,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { }, }} - var taskExpectedStateMatrix = PipelineRunState{{ - PipelineTask: &pts[20], // 2 retries needed - TaskRunNames: []string{"pipelinerun-mytask1"}, - TaskRuns: []*v1beta1.TaskRun{withRetries(makeFailed(trs[0]))}, - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - }} - var runCancelledByStatusStateMatrix = PipelineRunState{{ PipelineTask: &pts[20], // 2 retries needed RunNames: []string{"pipelinerun-mytask1"}, @@ -792,16 +735,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { }, }} - var runExpectedStateMatrix = PipelineRunState{{ - PipelineTask: &pts[20], // 2 retries needed - RunNames: []string{"pipelinerun-mytask1"}, - Runs: []*v1alpha1.Run{withRunRetries(makeRunFailed(runs[0]))}, - CustomTask: true, - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - }} - tcs := []struct { name string state PipelineRunState @@ -832,11 +765,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { state: taskRetriedState, candidates: sets.NewString("mytask5"), expectedNext: []*ResolvedPipelineTask{}, - }, { - name: "tasks-retried-one-candidates", - state: taskExpectedState, - candidates: sets.NewString("mytask5"), - expectedNext: []*ResolvedPipelineTask{taskExpectedState[0]}, }, { name: "runs-cancelled-no-candidates", state: runCancelledByStatusState, @@ -862,11 +790,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { state: runRetriedState, candidates: sets.NewString("mytask5"), expectedNext: []*ResolvedPipelineTask{}, - }, { - name: "run-retried-one-candidates", - state: runExpectedState, - candidates: sets.NewString("mytask5"), - expectedNext: []*ResolvedPipelineTask{runExpectedState[0]}, }, { name: "tasks-cancelled-no-candidates-matrix", state: taskCancelledByStatusStateMatrix, @@ -892,11 +815,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { state: taskRetriedStateMatrix, candidates: sets.NewString("mytask21"), expectedNext: []*ResolvedPipelineTask{}, - }, { - name: "tasks-retried-one-candidate-matrix", - state: taskExpectedStateMatrix, - candidates: sets.NewString("mytask21"), - expectedNext: []*ResolvedPipelineTask{taskExpectedStateMatrix[0]}, }, { name: "runs-cancelled-no-candidates-matrix", state: runCancelledByStatusStateMatrix, @@ -922,11 +840,6 @@ func TestGetNextTaskWithRetries(t *testing.T) { state: runRetriedStateMatrix, candidates: sets.NewString("mytask21"), expectedNext: []*ResolvedPipelineTask{}, - }, { - name: "run-retried-one-candidates-matrix", - state: runExpectedStateMatrix, - candidates: sets.NewString("mytask21"), - expectedNext: []*ResolvedPipelineTask{runExpectedStateMatrix[0]}, }} // iterate over *state* to get from candidate and check if TaskRun or Run is there. @@ -1023,28 +936,6 @@ func TestDAGExecutionQueue(t *testing.T) { Run: makeRunFailed(runs[0]), CustomTask: true, } - failedTaskWithRetries := ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{ - Name: "failedtaskwithretries", - TaskRef: &v1beta1.TaskRef{Name: "task"}, - Retries: 1, - }, - TaskRunName: "failedtaskwithretries", - TaskRun: makeFailed(trs[0]), - ResolvedTaskResources: &resources.ResolvedTaskResources{ - TaskSpec: &task.Spec, - }, - } - failedRunWithRetries := ResolvedPipelineTask{ - PipelineTask: &v1beta1.PipelineTask{ - Name: "failedrunwithretries", - TaskRef: &v1beta1.TaskRef{Name: "task"}, - Retries: 1, - }, - RunName: "failedrunwithretries", - Run: makeRunFailed(runs[0]), - CustomTask: true, - } tcs := []struct { name string state PipelineRunState @@ -1056,7 +947,6 @@ func TestDAGExecutionQueue(t *testing.T) { state: PipelineRunState{ &createdTask, &createdRun, &runningTask, &runningRun, &successfulTask, &successfulRun, - &failedTaskWithRetries, &failedRunWithRetries, }, }, { name: "gracefully cancelled", @@ -1064,7 +954,6 @@ func TestDAGExecutionQueue(t *testing.T) { state: PipelineRunState{ &createdTask, &createdRun, &runningTask, &runningRun, &successfulTask, &successfulRun, - &failedTaskWithRetries, &failedRunWithRetries, }, }, { name: "gracefully stopped", @@ -1072,34 +961,19 @@ func TestDAGExecutionQueue(t *testing.T) { state: PipelineRunState{ &createdTask, &createdRun, &runningTask, &runningRun, &successfulTask, &successfulRun, }, - }, { - name: "gracefully stopped with retryable tasks", - specStatus: v1beta1.PipelineRunSpecStatusStoppedRunFinally, - state: PipelineRunState{ - &createdTask, &createdRun, &runningTask, &runningRun, &successfulTask, &successfulRun, - &failedTask, &failedRun, &failedTaskWithRetries, &failedRunWithRetries, - }, - want: PipelineRunState{&failedTaskWithRetries, &failedRunWithRetries}, }, { name: "running", state: PipelineRunState{ &createdTask, &createdRun, &runningTask, &runningRun, - &failedTaskWithRetries, &failedRunWithRetries, &successfulTask, &successfulRun, + &successfulTask, &successfulRun, }, - want: PipelineRunState{&createdTask, &createdRun, &failedTaskWithRetries, &failedRunWithRetries}, + want: PipelineRunState{&createdTask, &createdRun}, }, { name: "stopped", state: PipelineRunState{ &createdTask, &createdRun, &runningTask, &runningRun, &successfulTask, &successfulRun, &failedTask, &failedRun, }, - }, { - name: "stopped with retryable tasks", - state: PipelineRunState{ - &createdTask, &createdRun, &runningTask, &runningRun, &successfulTask, &successfulRun, - &failedTask, &failedRun, &failedTaskWithRetries, &failedRunWithRetries, - }, - want: PipelineRunState{&failedTaskWithRetries, &failedRunWithRetries}, }, { name: "all tasks finished", state: PipelineRunState{&successfulTask, &successfulRun, &failedTask, &failedRun}, @@ -1612,17 +1486,6 @@ func TestPipelineRunState_GetFinalTasksAndNames(t *testing.T) { expectedFinalTasks: PipelineRunState{}, expectedFinalNames: sets.NewString(pts[1].Name), expectedTaskNames: sets.NewString(pts[0].Name), - }, { - // tasks: [ mytask1] - // finally: [mytask4] - name: "07 - DAG tasks succeeded, return retryable final tasks", - desc: "DAG task (mytask1) finished successfully - retry failed final tasks (mytask4)", - state: retryableFinalState, - DAGTasks: []v1beta1.PipelineTask{pts[0]}, - finalTasks: []v1beta1.PipelineTask{pts[3]}, - expectedFinalTasks: PipelineRunState{retryableFinalState[1]}, - expectedFinalNames: sets.NewString(pts[3].Name), - expectedTaskNames: sets.NewString(pts[0].Name), }} for _, tc := range tcs { dagGraph, err := dag.Build(v1beta1.PipelineTaskList(tc.DAGTasks), v1beta1.PipelineTaskList(tc.DAGTasks).Deps()) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index b5a7f6e8646..677b78f66c0 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -293,7 +293,9 @@ func (c *Reconciler) finishReconcileUpdateEmitEvents(ctx context.Context, tr *v1 logger := logging.FromContext(ctx) afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) - + if afterCondition.IsFalse() && !tr.IsCancelled() && tr.IsRetriable() { + retryTaskRun(tr, afterCondition.Message) + } // Send k8s events and cloud events (when configured) events.Emit(ctx, beforeCondition, afterCondition, tr) @@ -445,7 +447,8 @@ func (c *Reconciler) prepare(ctx context.Context, tr *v1beta1.TaskRun) (*v1beta1 // error but it does not sync updates back to etcd. It does not emit events. // `reconcile` consumes spec and resources returned by `prepare` func (c *Reconciler) reconcile(ctx context.Context, tr *v1beta1.TaskRun, rtr *resources.ResolvedTaskResources) error { - defer c.durationAndCountMetrics(ctx, tr) + newTr := tr.DeepCopy() + defer c.durationAndCountMetrics(ctx, newTr) logger := logging.FromContext(ctx) recorder := controller.GetEventRecorder(ctx) var err error @@ -477,7 +480,7 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1beta1.TaskRun, rtr *re } for index := range pos { po := pos[index] - if metav1.IsControlledBy(po, tr) && !podconvert.DidTaskRunFail(po) { + if metav1.IsControlledBy(po, tr) && !podconvert.DidTaskRunFail(po) && !podconvert.IsPodArchived(po, &tr.Status) { pod = po } } @@ -968,3 +971,16 @@ func isResourceQuotaConflictError(err error) bool { } return k8ErrStatus.Details != nil && k8ErrStatus.Details.Kind == "resourcequotas" } + +// retryTaskRun archives taskRun.Status to taskRun.Status.RetriesStatus, and set +// taskRun status to Unknown with Reason v1beta1.TaskRunReasonToBeRetried. +func retryTaskRun(tr *v1beta1.TaskRun, message string) { + newStatus := tr.Status.DeepCopy() + newStatus.RetriesStatus = nil + tr.Status.RetriesStatus = append(tr.Status.RetriesStatus, *newStatus) + tr.Status.StartTime = nil + tr.Status.CompletionTime = nil + tr.Status.PodName = "" + taskRunCondSet := apis.NewBatchConditionSet() + taskRunCondSet.Manage(&tr.Status).MarkUnknown(apis.ConditionSucceeded, v1beta1.TaskRunReasonToBeRetried.String(), message) +} diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index 53b9b4bb3e5..58c38047da3 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -93,7 +93,9 @@ var ( PRImage: "override-with-pr:latest", ImageDigestExporterImage: "override-with-imagedigest-exporter-image:latest", } - now = time.Date(2022, time.January, 1, 0, 0, 0, 0, time.UTC) + + now = time.Date(2022, time.January, 1, 0, 0, 0, 0, time.UTC) + ignoreLastTransitionTime = cmpopts.IgnoreFields(apis.Condition{}, "LastTransitionTime.Inner.Time") // Pods are created with a random 5-character suffix that we want to // ignore in our diffs. @@ -102,15 +104,16 @@ var ( }, cmp.Comparer(func(name1, name2 string) bool { return name1[:len(name1)-5] == name2[:len(name2)-5] })) + ignoreEnvVarOrdering = cmpopts.SortSlices(func(x, y corev1.EnvVar) bool { return x.Name < y.Name }) + resourceQuantityCmp = cmp.Comparer(func(x, y resource.Quantity) bool { return x.Cmp(y) == 0 }) - ignoreEnvVarOrdering = cmpopts.SortSlices(func(x, y corev1.EnvVar) bool { return x.Name < y.Name }) - volumeSort = cmpopts.SortSlices(func(i, j corev1.Volume) bool { return i.Name < j.Name }) - volumeMountSort = cmpopts.SortSlices(func(i, j corev1.VolumeMount) bool { return i.Name < j.Name }) - cloudEventTarget1 = "https://foo" - cloudEventTarget2 = "https://bar" + volumeSort = cmpopts.SortSlices(func(i, j corev1.Volume) bool { return i.Name < j.Name }) + volumeMountSort = cmpopts.SortSlices(func(i, j corev1.VolumeMount) bool { return i.Name < j.Name }) + cloudEventTarget1 = "https://foo" + cloudEventTarget2 = "https://bar" simpleStep = v1beta1.Step{ Name: "simple-step", @@ -1706,7 +1709,335 @@ spec: } }) } +} +func TestReconcileRetry(t *testing.T) { + var ( + toBeCanceledTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-canceled + namespace: foo +spec: + retries: 1 + status: TaskRunCancelled + taskRef: + name: test-task +status: + startTime: "2021-12-31T23:59:59Z" + conditions: + - reason: Running + status: Unknown + type: Succeeded + message: "TaskRun \"test-taskrun-run-retry-canceled\" was cancelled. " +`) + canceledTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-canceled + namespace: foo +spec: + retries: 1 + status: TaskRunCancelled + taskRef: + name: test-task +status: + startTime: "2021-12-31T23:59:59Z" + completionTime: "2022-01-01T00:00:00Z" + conditions: + - reason: TaskRunCancelled + status: "False" + type: Succeeded + message: "TaskRun \"test-taskrun-run-retry-canceled\" was cancelled. " +`) + toBeTimedOutTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-timedout + namespace: foo +spec: + retries: 1 + timeout: "10s" + taskRef: + name: test-task +status: + startTime: "2021-12-31T00:00:00Z" + conditions: + - reason: Running + status: Unknown + type: Succeeded + message: TaskRun "test-taskrun-run-retry-timedout" failed to finish within "10s" +`) + timedOutTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-timedout + namespace: foo +spec: + retries: 1 + timeout: "10s" + taskRef: + name: test-task +status: + conditions: + - reason: ToBeRetried + status: Unknown + type: Succeeded + message: TaskRun "test-taskrun-run-retry-timedout" failed to finish within "10s" + retriesStatus: + - conditions: + - reason: "TaskRunTimeout" + status: "False" + type: Succeeded + message: TaskRun "test-taskrun-run-retry-timedout" failed to finish within "10s" + startTime: "2021-12-31T00:00:00Z" + completionTime: "2022-01-01T00:00:00Z" + `) + toFailOnPodFailureTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-pod-failure + namespace: foo +spec: + retries: 1 + taskRef: + name: test-task +status: + startTime: "2021-12-31T23:59:59Z" + podName: test-taskrun-run-retry-pod-failure-pod + steps: + - container: step-unamed-0 + name: unamed-0 + waiting: + reason: "ImagePullBackOff" +`) + failedOnPodFailureTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-pod-failure + namespace: foo +spec: + retries: 1 + taskRef: + name: test-task +status: + conditions: + - reason: ToBeRetried + status: Unknown + type: Succeeded + message: "The step \"unamed-0\" in TaskRun \"test-taskrun-run-retry-pod-failure\" failed to pull the image \"\". The pod errored with the message: \".\"" + steps: + - container: step-unamed-0 + name: unamed-0 + terminated: + exitCode: 1 + finishedAt: "2022-01-01T00:00:00Z" + reason: "TaskRunImagePullFailed" + retriesStatus: + - conditions: + - reason: "TaskRunImagePullFailed" + status: "False" + type: Succeeded + message: "The step \"unamed-0\" in TaskRun \"test-taskrun-run-retry-pod-failure\" failed to pull the image \"\". The pod errored with the message: \".\"" + startTime: "2021-12-31T23:59:59Z" + completionTime: "2022-01-01T00:00:00Z" + podName: test-taskrun-run-retry-pod-failure-pod + steps: + - container: step-unamed-0 + name: unamed-0 + terminated: + exitCode: 1 + finishedAt: "2022-01-01T00:00:00Z" + reason: "TaskRunImagePullFailed" +`) + failedPod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-taskrun-run-retry-pod-failure-pod"}, + Status: corev1.PodStatus{Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: "False", + Reason: "PodFailed", + }}}, + } + toFailOnPrepareTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-prepare-failure + namespace: foo +spec: + retries: 1 + taskRef: + name: test-task +status: + startTime: "2021-12-31T23:59:59Z" + conditions: + - reason: Running + status: Unknown + type: Succeeded + lastTransitionTime: "2022-01-01T00:00:00Z" +`) + failedOnPrepareTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-run-retry-prepare-failure + namespace: foo +spec: + retries: 1 + taskRef: + name: test-task +status: + conditions: + - reason: ToBeRetried + status: Unknown + type: Succeeded + message: "error when listing tasks for taskRun test-taskrun-run-retry-prepare-failure: tasks.tekton.dev \"test-task\" not found" + retriesStatus: + - conditions: + - reason: TaskRunResolutionFailed + status: "False" + type: "Succeeded" + message: "error when listing tasks for taskRun test-taskrun-run-retry-prepare-failure: tasks.tekton.dev \"test-task\" not found" + startTime: "2021-12-31T23:59:59Z" + completionTime: "2022-01-01T00:00:00Z" +`) + prepareError = fmt.Errorf("1 error occurred:\n\t* error when listing tasks for taskRun test-taskrun-run-retry-prepare-failure: tasks.tekton.dev \"test-task\" not found") + toFailOnReconcileFailureTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-results-type-mismatched + namespace: foo +spec: + retries: 1 + taskRef: + name: test-results-task +status: + startTime: "2021-12-31T23:59:59Z" + taskResults: + - name: aResult + type: string + value: aResultValue +`) + failedOnReconcileFailureTaskRun = parse.MustParseV1beta1TaskRun(t, ` +metadata: + name: test-taskrun-results-type-mismatched + namespace: foo +spec: + retries: 1 + taskRef: + name: test-results-task +status: + taskResults: + - name: aResult + type: string + value: aResultValue + conditions: + - reason: ToBeRetried + status: Unknown + type: Succeeded + message: "missmatched Types for these results, map[aResult:[array]]" + sideCars: + retriesStatus: + - conditions: + - reason: TaskRunValidationFailed + status: "False" + type: "Succeeded" + message: "missmatched Types for these results, map[aResult:[array]]" + startTime: "2021-12-31T23:59:59Z" + completionTime: "2022-01-01T00:00:00Z" + podName: "test-taskrun-results-type-mismatched-pod" + taskResults: + - name: aResult + type: string + value: aResultValue +`) + reconciliatonError = fmt.Errorf("1 error occurred:\n\t* missmatched Types for these results, map[aResult:[array]]") + ) + + for _, tc := range []struct { + name string + task *v1beta1.Task + tr *v1beta1.TaskRun + expectedTr *v1beta1.TaskRun + pod *corev1.Pod + reconcileError error + expectCompletionTimeSet bool + }{{ + name: "No Retry on Cancellation", + task: simpleTask, + tr: toBeCanceledTaskRun, + expectedTr: canceledTaskRun, + expectCompletionTimeSet: true, + }, { + name: "Retry on TimedOut", + task: simpleTask, + tr: toBeTimedOutTaskRun, + expectedTr: timedOutTaskRun, + }, { + name: "Retry on TaskRun Pod Failure", + task: simpleTask, + tr: toFailOnPodFailureTaskRun, + expectedTr: failedOnPodFailureTaskRun, + pod: failedPod, + }, { + name: "Retry on TaskRun Prepare Failure", + tr: toFailOnPrepareTaskRun, + expectedTr: failedOnPrepareTaskRun, + reconcileError: prepareError, + }, { + name: "Retry on TaskRun Reconciliation Failure", + task: resultsTask, + tr: toFailOnReconcileFailureTaskRun, + expectedTr: failedOnReconcileFailureTaskRun, + reconcileError: reconciliatonError, + }} { + t.Run(tc.name, func(t *testing.T) { + d := test.Data{ + TaskRuns: []*v1beta1.TaskRun{tc.tr}, + ConfigMaps: []*corev1.ConfigMap{{ + ObjectMeta: metav1.ObjectMeta{Namespace: system.Namespace(), Name: config.GetFeatureFlagsConfigName()}, + Data: map[string]string{ + "enable-api-fields": config.AlphaAPIFields, + }, + }}, + } + if tc.task != nil { + d.Tasks = []*v1beta1.Task{tc.task} + } + if tc.pod != nil { + d.Pods = []*corev1.Pod{tc.pod} + } + testAssets, cancel := getTaskRunController(t, d) + defer cancel() + c := testAssets.Controller + clients := testAssets.Clients + createServiceAccount(t, testAssets, "default", tc.tr.Namespace) + + err := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tc.tr)) + if tc.reconcileError != nil { + if err == nil { + t.Error("expect error in TaskRun reconciliation, but got nil") + } else { + if d := cmp.Diff(strings.TrimSuffix(err.Error(), "\n\n"), tc.reconcileError.Error()); d != "" { + t.Errorf("Expected: %v, but Got: %v", tc.reconcileError, err) + } + } + } else if err != nil { + if ok, _ := controller.IsRequeueKey(err); !ok && tc.reconcileError == nil { + t.Errorf("unexpected error in TaskRun reconciliation: %v", err) + } + } + + reconciledTaskRun, err := clients.Pipeline.TektonV1beta1().TaskRuns("foo").Get(testAssets.Ctx, tc.tr.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err) + } + + ignoreCompletionTime := cmpopts.IgnoreFields(v1beta1.TaskRunStatusFields{}, "CompletionTime") + ignoreObjectMeta := cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Labels", "ResourceVersion", "Annotations") + ignoreStatusTaskSpec := cmpopts.IgnoreFields(v1beta1.TaskRunStatusFields{}, "TaskSpec") + ignoreTaskRunStatusFields := cmpopts.IgnoreFields(v1beta1.TaskRunStatusFields{}, "Steps", "Sidecars") + if d := cmp.Diff(reconciledTaskRun, tc.expectedTr, ignoreLastTransitionTime, ignoreObjectMeta, ignoreCompletionTime, ignoreStatusTaskSpec, ignoreTaskRunStatusFields); d != "" { + t.Errorf("Didn't get expected TaskRun: %v", diff.PrintWantGot(d)) + } + + if !tc.expectCompletionTimeSet && reconciledTaskRun.Status.CompletionTime != nil { + t.Error("Expect completion time to be nil") + } + if tc.expectCompletionTimeSet && reconciledTaskRun.Status.CompletionTime == nil { + t.Error("Didn't expect completion time to be nil") + } + }) + } } func TestReconcileGetTaskError(t *testing.T) {