diff --git a/docs/events.md b/docs/events.md new file mode 100644 index 00000000000..add381754cc --- /dev/null +++ b/docs/events.md @@ -0,0 +1,39 @@ + +# Events + +Tekton runtime resources, specifically `TaskRuns` and `PipelineRuns`, +emit events when they are executed, so that users can monitor their lifecycle +and react to it. Tekton emits [kubernetes events](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#event-v1-core), that can be retrieve from the resource via +`kubectl describe [resource]`. + +No events are emitted for `Conditions` today (https://github.com/tektoncd/pipeline/issues/2461). + +## TaskRuns + +`TaskRun` events are generated for the following `Reasons`: +- `Started`: this is triggered the first time the `TaskRun` is picked by the + reconciler from its work queue, so it only happens if web-hook validation was + successful. Note that this event does not imply that a step started executing, + as several conditions must be met first: + - task and bound resource validation must be successful + - attached conditions must run successfully + - the `Pod` associated to the `TaskRun` must be successfully scheduled +- `Succeeded`: this is triggered once all steps in the `TaskRun` are executed + successfully, including post-steps injected by Tekton. +- `Failed`: this is triggered if the `TaskRun` is completed, but not successfully. + Causes of failure may be: one the steps failed, the `TaskRun` was cancelled or + the `TaskRun` timed out. + +## PipelineRuns + +`PipelineRun` events are generated for the following `Reasons`: +- `Succeeded`: this is triggered once all `Tasks` reachable via the DAG are + executed successfully. +- `Failed`: this is triggered if the `PipelineRun` is completed, but not + successfully. Causes of failure may be: one the `Tasks` failed or the + `PipelineRun` was cancelled. diff --git a/docs/pipelineruns.md b/docs/pipelineruns.md index 05b2a69469f..89d09fb4944 100644 --- a/docs/pipelineruns.md +++ b/docs/pipelineruns.md @@ -18,6 +18,8 @@ weight: 4 - [Specifying `LimitRange` values](#specifying-limitrange-values) - [Configuring a failure timeout](#configuring-a-failure-timeout) - [Cancelling a `PipelineRun`](#cancelling-a-pipelinerun) +- [Events](events.md#pipelineruns) + ## Overview diff --git a/docs/taskruns.md b/docs/taskruns.md index ce27a5b457d..6d65114a9bf 100644 --- a/docs/taskruns.md +++ b/docs/taskruns.md @@ -22,6 +22,7 @@ weight: 2 - [Monitoring `Steps`](#monitoring-steps) - [Monitoring `Results`](#monitoring-results) - [Cancelling a `TaskRun`](#cancelling-a-taskrun) +- [Events](events.md#taskruns) - [Code examples](#code-examples) - [Example `TaskRun` with a referenced `Task`](#example-taskrun-with-a-referenced-task) - [Example `TaskRun` with an embedded `Task`](#example-taskrun-with-an-embedded-task) diff --git a/pkg/reconciler/event.go b/pkg/reconciler/event.go index e35560b88ef..840fb10844b 100644 --- a/pkg/reconciler/event.go +++ b/pkg/reconciler/event.go @@ -27,10 +27,11 @@ const ( EventReasonSucceded = "Succeeded" // EventReasonFailed is the reason set for events about unsuccessful completion of TaskRuns / PipelineRuns EventReasonFailed = "Failed" + // EventReasonStarted is the reason set for events about the start of TaskRuns / PipelineRuns + EventReasonStarted = "Started" ) -// EmitEvent emits success or failed event for object -// if afterCondition is different from beforeCondition +// EmitEvent emits an event for object if afterCondition is different from beforeCondition func EmitEvent(c record.EventRecorder, beforeCondition *apis.Condition, afterCondition *apis.Condition, object runtime.Object) { if beforeCondition != afterCondition && afterCondition != nil { // Create events when the obj result is in. @@ -38,7 +39,11 @@ func EmitEvent(c record.EventRecorder, beforeCondition *apis.Condition, afterCon case corev1.ConditionTrue: c.Event(object, corev1.EventTypeNormal, EventReasonSucceded, afterCondition.Message) case corev1.ConditionUnknown: - c.Event(object, corev1.EventTypeNormal, afterCondition.Reason, afterCondition.Message) + if beforeCondition == nil { + c.Event(object, corev1.EventTypeNormal, EventReasonStarted, "") + } else { + c.Event(object, corev1.EventTypeNormal, afterCondition.Reason, afterCondition.Message) + } case corev1.ConditionFalse: c.Event(object, corev1.EventTypeWarning, EventReasonFailed, afterCondition.Message) } diff --git a/pkg/reconciler/event_test.go b/pkg/reconciler/event_test.go index f6c464c23c7..091492acaa0 100644 --- a/pkg/reconciler/event_test.go +++ b/pkg/reconciler/event_test.go @@ -80,6 +80,14 @@ func TestEmitEvent(t *testing.T) { Status: corev1.ConditionTrue, }, expectEvent: true, + }, { + name: "nil to unknown", + before: nil, + after: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + expectEvent: true, }} for _, ts := range testcases { diff --git a/pkg/reconciler/taskrun/cancel.go b/pkg/reconciler/taskrun/cancel.go deleted file mode 100644 index 8eff915d7da..00000000000 --- a/pkg/reconciler/taskrun/cancel.go +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright 2019 The Tekton Authors - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package taskrun - -import ( - "fmt" - "time" - - "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1" - podconvert "github.com/tektoncd/pipeline/pkg/pod" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "knative.dev/pkg/apis" -) - -type logger interface { - Warn(args ...interface{}) - Warnf(template string, args ...interface{}) - Infof(template string, args ...interface{}) -} - -func killTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, - logger logger, reason, message string) error { - - logger.Warn("stopping task run %q because of %q", tr.Name, reason) - tr.Status.SetCondition(&apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionFalse, - Reason: reason, - Message: message, - }) - - // update tr completed time - tr.Status.CompletionTime = &metav1.Time{Time: time.Now()} - - if tr.Status.PodName == "" { - logger.Warnf("task run %q has no pod running yet", tr.Name) - return nil - } - - // tr.Status.PodName will be empty if the pod was never successfully created. This condition - // can be reached, for example, by the pod never being schedulable due to limits imposed by - // a namespace's ResourceQuota. - err := clientSet.CoreV1().Pods(tr.Namespace).Delete(tr.Status.PodName, &metav1.DeleteOptions{}) - if err != nil && !errors.IsNotFound(err) { - logger.Warnf("Failed to terminate pod: %v", err) - return err - } - return nil -} - -// cancelTaskRun marks the TaskRun as cancelled and delete pods linked to it. -func cancelTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error { - message := fmt.Sprintf("TaskRun %q was cancelled", tr.Name) - return killTaskRun(tr, clientSet, logger, "TaskRunCancelled", message) -} - -func timeoutTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error { - message := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, tr.Spec.Timeout.Duration.String()) - return killTaskRun(tr, clientSet, logger, podconvert.ReasonTimedOut, message) -} diff --git a/pkg/reconciler/taskrun/controller.go b/pkg/reconciler/taskrun/controller.go index 4642b801947..193cc8a3b29 100644 --- a/pkg/reconciler/taskrun/controller.go +++ b/pkg/reconciler/taskrun/controller.go @@ -44,6 +44,7 @@ const ( resyncPeriod = 10 * time.Hour ) +// NewController instantiates a new controller.Impl from knative.dev/pkg/controller func NewController(images pipeline.Images) func(context.Context, configmap.Watcher) *controller.Impl { return func(ctx context.Context, cmw configmap.Watcher) *controller.Impl { logger := logging.FromContext(ctx) diff --git a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go index 04fde4fbc85..694a3ebe2fc 100644 --- a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go +++ b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go @@ -66,8 +66,7 @@ func cloudEventDeliveryFromTargets(targets []string) []v1alpha1.CloudEventDelive } // SendCloudEvents is used by the TaskRun controller to send cloud events once -// the TaskRun is complete. `tr` is used to obtain the list of targets but also -// to construct the body of the +// the TaskRun is complete. `tr` is used to obtain the list of targets func SendCloudEvents(tr *v1alpha1.TaskRun, ceclient CEClient, logger *zap.SugaredLogger) error { logger = logger.With(zap.String("taskrun", tr.Name)) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index e3f109410ff..a066680a804 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -103,11 +103,15 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { // If the TaskRun is just starting, this will also set the starttime, // from which the timeout will immediately begin counting down. - tr.Status.InitializeConditions() - // In case node time was not synchronized, when controller has been scheduled to other nodes. - if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 { - c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime) - tr.Status.StartTime = &tr.CreationTimestamp + if !tr.HasStarted() { + tr.Status.InitializeConditions() + // In case node time was not synchronized, when controller has been scheduled to other nodes. + if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 { + c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime) + tr.Status.StartTime = &tr.CreationTimestamp + } + // Run asnyc startup hooks + go c.preRunAsyncHook(ctx, tr) } // If the TaskRun is complete, run some post run fixtures when applicable @@ -161,36 +165,20 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { // If the TaskRun is cancelled, kill resources and update status if tr.IsCancelled() { before := tr.Status.GetCondition(apis.ConditionSucceeded) -<<<<<<< HEAD message := fmt.Sprintf("TaskRun %q was cancelled", tr.Name) err := c.failTaskRun(tr, v1beta1.TaskRunReasonCancelled, message) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + go c.postRunAsyncHook(ctx, tr, before) return multierror.Append(err, c.updateStatusLabelsAndAnnotations(tr, original)).ErrorOrNil() -======= - err := cancelTaskRun(tr, c.KubeClientSet, c.Logger) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) - return err ->>>>>>> Consolidate cancel and timeout logic } // Check if the TaskRun has timed out; if it is, this will set its status // accordingly. if tr.HasTimedOut() { before := tr.Status.GetCondition(apis.ConditionSucceeded) -<<<<<<< HEAD message := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, tr.GetTimeout()) err := c.failTaskRun(tr, podconvert.ReasonTimedOut, message) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + go c.postRunAsyncHook(ctx, tr, before) return multierror.Append(err, c.updateStatusLabelsAndAnnotations(tr, original)).ErrorOrNil() -======= - err := timeoutTaskRun(tr, c.KubeClientSet, c.Logger) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) - return err ->>>>>>> Consolidate cancel and timeout logic } // prepare fetches all required resources, validates them together with the @@ -215,13 +203,42 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { c.Logger.Errorf("Reconcile error: %v", err.Error()) } - // Emit events (only when ConditionSucceeded was changed) + // Run post run hook (Emit events) (only when ConditionSucceeded was changed) after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + // If after is different from before and status is not Unknown, the taskrun + // has completed its work - except for post-run tasks like emitting events, + // recording metrics, sending cloud events. + // Once tr.isDone becomes true, even when this key is queued, `reconcile` + // won't be invoked so we won't pass through here again + if tr.IsDone() && after != before { + go c.postRunAsyncHook(ctx, tr, before) + } else { + reconciler.EmitEvent(c.Recorder, before, after, tr) + } return multierror.Append(err, c.updateStatusLabelsAndAnnotations(tr, original)).ErrorOrNil() } +// Run any async logic that may be required at start-up time. This method is used +// to emit events, notifications or any other async operation +func (c *Reconciler) preRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun) { + c.Logger.Infof("preRunAsyncHook: %s", tr.Name) + + // Emit event + afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) + reconciler.EmitEvent(c.Recorder, nil, afterCondition, tr) +} + +// Run any async logic that may be required once the tr is successfully reconciled +// This method is used to emit events, notifications or any other async operation +func (c *Reconciler) postRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun, beforeCondition *apis.Condition) { + c.Logger.Infof("postRunAsyncHook: %s", tr.Name) + + // Emit event + afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) + reconciler.EmitEvent(c.Recorder, beforeCondition, afterCondition, tr) +} + // `prepare` fetches resources the taskrun depends on, runs validation and convertion // It may report errors back to Reconcile, it updates the taskrun status in case of // error but it does not sync updates back to etcd. It does not emit events. @@ -237,10 +254,6 @@ func (c *Reconciler) prepare(ctx context.Context, tr *v1alpha1.TaskRun) (*v1alph // and may not have had all of the assumed default specified. tr.SetDefaults(contexts.WithUpgradeViaDefaulting(ctx)) - if tr.Spec.Timeout == nil { - tr.Spec.Timeout = &metav1.Duration{Duration: config.DefaultTimeoutMinutes * time.Minute} - } - if err := tr.ConvertTo(ctx, &v1beta1.TaskRun{}); err != nil { if ce, ok := err.(*v1beta1.CannotConvertError); ok { tr.Status.MarkResourceNotConvertible(ce) @@ -406,7 +419,6 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun, } c.Logger.Infof("Successfully reconciled taskrun %s/%s with status: %#v", tr.Name, tr.Namespace, tr.Status.GetCondition(apis.ConditionSucceeded)) - return nil }