The cancellation of taskruns is now done through the entrypoint binary

through a new flag called 'stop_on_cancel'. This removes the need for deleting the pods to cancel a taskrun, allowing examination of the logs on the pods from cancelled taskruns. Part of work on issue #3238 Signed-off-by: chengjoey <[email protected]>
tektoncd · Aug 29, 2023 · cc78142 · cc78142
1 parent 2299b15
commit cc78142
Show file tree

Hide file tree

Showing 19 changed files with 745 additions and 51 deletions.
diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go
@@ -17,6 +17,7 @@ limitations under the License.
 package main
 
 import (
+	"context"
 	"encoding/json"
 	"errors"
 	"flag"
@@ -67,7 +68,7 @@ const (
 
 func checkForBreakpointOnFailure(e entrypoint.Entrypointer, breakpointExitPostFile string) {
 	if e.BreakpointOnFailure {
-		if waitErr := e.Waiter.Wait(breakpointExitPostFile, false, false); waitErr != nil {
+		if waitErr := e.Waiter.Wait(context.Background(), breakpointExitPostFile, false, false); waitErr != nil {
 			log.Println("error occurred while waiting for " + breakpointExitPostFile + " : " + waitErr.Error())
 		}
 		// get exitcode from .breakpointexit
@@ -181,6 +182,15 @@ func main() {
 		case termination.MessageLengthError:
 			log.Print(err.Error())
 			os.Exit(1)
+		case entrypoint.ContextError:
+			if errors.Is(err, entrypoint.ErrContextCanceled) {
+				log.Print("Step was cancelled")
+				// use the SIGKILL signal to distinguish normal exit programs, just like kill -9 PID
+				os.Exit(int(syscall.SIGKILL))
+			} else {
+				log.Print(err.Error())
+				os.Exit(1)
+			}
 		case *exec.ExitError:
 			// Copied from https://stackoverflow.com/questions/10385551/get-exit-code-go
 			// This works on both Unix and Windows. Although

diff --git a/cmd/entrypoint/runner.go b/cmd/entrypoint/runner.go
@@ -118,7 +118,10 @@ func (rr *realRunner) Run(ctx context.Context, args ...string) error {
 	// Start defined command
 	if err := cmd.Start(); err != nil {
 		if errors.Is(ctx.Err(), context.DeadlineExceeded) {
-			return context.DeadlineExceeded
+			return entrypoint.ErrContextDeadlineExceeded
+		}
+		if errors.Is(ctx.Err(), context.Canceled) {
+			return entrypoint.ErrContextCanceled
 		}
 		return err
 	}
@@ -134,9 +137,15 @@ func (rr *realRunner) Run(ctx context.Context, args ...string) error {
 	}()
 
 	// Wait for command to exit
+	// as os.exec [note](https://github.com/golang/go/blob/ee522e2cdad04a43bc9374776483b6249eb97ec9/src/os/exec/exec.go#L897-L906)
+	// cmd.Wait prefer Process error over context error
+	// but we want to return context error instead
 	if err := cmd.Wait(); err != nil {
 		if errors.Is(ctx.Err(), context.DeadlineExceeded) {
-			return context.DeadlineExceeded
+			return entrypoint.ErrContextDeadlineExceeded
+		}
+		if errors.Is(ctx.Err(), context.Canceled) {
+			return entrypoint.ErrContextCanceled
 		}
 		return err
 	}

diff --git a/cmd/entrypoint/runner_test.go b/cmd/entrypoint/runner_test.go
@@ -22,12 +22,15 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"math/rand"
 	"os"
 	"path/filepath"
 	"strings"
 	"syscall"
 	"testing"
 	"time"
+
+	"github.com/tektoncd/pipeline/pkg/entrypoint"
 )
 
 // TestRealRunnerSignalForwarding will artificially put an interrupt signal (SIGINT) in the rr.signals chan.
@@ -183,10 +186,52 @@ func TestRealRunnerTimeout(t *testing.T) {
 	defer cancel()
 
 	if err := rr.Run(ctx, "sleep", "0.01"); err != nil {
-		if !errors.Is(err, context.DeadlineExceeded) {
+		if !errors.Is(err, entrypoint.ErrContextDeadlineExceeded) {
 			t.Fatalf("unexpected error received: %v", err)
 		}
 	} else {
 		t.Fatalf("step didn't timeout")
 	}
 }
+
+func TestRealRunnerCancel(t *testing.T) {
+	testCases := []struct {
+		name    string
+		timeout time.Duration
+		wantErr error
+	}{
+		{
+			name:    "cancel before cmd wait",
+			timeout: 0,
+			wantErr: entrypoint.ErrContextCanceled,
+		},
+		{
+			name:    "cancel on cmd wait",
+			timeout: time.Second * time.Duration(rand.Intn(3)),
+			wantErr: entrypoint.ErrContextCanceled,
+		},
+		{
+			name:    "cancel after cmd wait",
+			timeout: time.Second * 4,
+			wantErr: nil,
+		},
+	}
+	for _, tc := range testCases {
+		rr := realRunner{}
+		ctx, cancel := context.WithCancel(context.Background())
+		go func() {
+			time.Sleep(tc.timeout)
+			cancel()
+		}()
+		err := rr.Run(ctx, "sleep", "3")
+		if tc.wantErr != nil {
+			if !errors.Is(err, tc.wantErr) {
+				t.Fatalf("unexpected error received: %v", err)
+			}
+		} else {
+			if err != nil {
+				t.Fatalf("unexpected error received: %v", err)
+			}
+		}
+	}
+}
diff --git a/cmd/entrypoint/waiter.go b/cmd/entrypoint/waiter.go
@@ -17,6 +17,8 @@ limitations under the License.
 package main
 
 import (
+	"context"
+	"errors"
 	"fmt"
 	"os"
 	"time"
@@ -47,11 +49,22 @@ func (rw *realWaiter) setWaitPollingInterval(pollingInterval time.Duration) *rea
 //
 // If a file of the same name with a ".err" extension exists then this Wait
 // will end with a skipError.
-func (rw *realWaiter) Wait(file string, expectContent bool, breakpointOnFailure bool) error {
+func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool, breakpointOnFailure bool) error {
 	if file == "" {
 		return nil
 	}
-	for ; ; time.Sleep(rw.waitPollingInterval) {
+	for {
+		select {
+		case <-ctx.Done():
+			if errors.Is(ctx.Err(), context.Canceled) {
+				return entrypoint.ErrContextCanceled
+			}
+			if errors.Is(ctx.Err(), context.DeadlineExceeded) {
+				return entrypoint.ErrContextDeadlineExceeded
+			}
+			return nil
+		case <-time.After(rw.waitPollingInterval):
+		}
 		if info, err := os.Stat(file); err == nil {
 			if !expectContent || info.Size() > 0 {
 				return nil