diff --git a/connectivity/check/action.go b/connectivity/check/action.go index c2c7aff0df..53d9d11907 100644 --- a/connectivity/check/action.go +++ b/connectivity/check/action.go @@ -12,6 +12,7 @@ import ( "io" "math" "net" + "reflect" "regexp" "strconv" "strings" @@ -204,8 +205,12 @@ func (a *Action) Run(f func(*Action)) { // TODO(timo): printFlows is a misnomer, this function actually prints // the verdict annotated over the list of flows. if a.test.ctx.PrintFlows() || a.failed { - a.printFlows(a.Source()) - a.printFlows(a.Destination()) + if a.Source() != nil && !reflect.ValueOf(a.Source()).IsNil() { + a.printFlows(a.Source()) + } + if a.Destination() != nil && !reflect.ValueOf(a.Destination()).IsNil() { + a.printFlows(a.Destination()) + } } if a.failed { if a.test.ctx.params.PauseOnFail { diff --git a/connectivity/check/utils.go b/connectivity/check/utils.go index 791c802540..22317b99bb 100644 --- a/connectivity/check/utils.go +++ b/connectivity/check/utils.go @@ -13,6 +13,7 @@ const ( IPFamilyAny IPFamily = iota IPFamilyV4 IPFamilyV6 + IPFamilyNone ) func (f IPFamily) String() string { @@ -23,6 +24,8 @@ func (f IPFamily) String() string { return "ipv4" case IPFamilyV6: return "ipv6" + case IPFamilyNone: + return "" } return "undefined" } diff --git a/connectivity/suite.go b/connectivity/suite.go index 34d2bd6e5f..c25688c200 100644 --- a/connectivity/suite.go +++ b/connectivity/suite.go @@ -1063,5 +1063,9 @@ func Run(ctx context.Context, ct *check.ConnectivityTest, addExtraTests func(*ch return err } + if ct.Params().IncludeUnsafeTests { + ct.NewTest("check-log-errors").WithScenarios(tests.NoErrorsInLogs()) + } + return ct.Run(ctx) } diff --git a/connectivity/tests/errors.go b/connectivity/tests/errors.go new file mode 100644 index 0000000000..9fbcb87796 --- /dev/null +++ b/connectivity/tests/errors.go @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Cilium + +package tests + +import ( + "context" + "strings" + "time" + + "github.com/cilium/cilium-cli/connectivity/check" +) + +// NoErrorsInLogs checks whether there are no error messages in cilium-agent +// logs. The error messages are defined in badLogMsgsWithExceptions, which key +// is an error message, while values is a list of ignored messages. +func NoErrorsInLogs() check.Scenario { + return &noErrorsInLogs{} +} + +type noErrorsInLogs struct{} + +func (n *noErrorsInLogs) Name() string { + return "no-errors-in-logs" +} + +func (n *noErrorsInLogs) Run(ctx context.Context, t *check.Test) { + var since time.Time + ct := t.Context() + + for _, pod := range ct.CiliumPods() { + pod := pod + logs, err := pod.K8sClient.CiliumLogs(ctx, pod.Pod.Namespace, pod.Pod.Name, since, nil) + if err != nil { + t.Fatalf("Error reading Cilium logs: %s", err) + } + t.NewAction(n, pod.Pod.Name, nil, nil, check.IPFamilyNone).Run(func(a *check.Action) { + checkErrorsInLogs(logs, a) + }) + } + +} + +func checkErrorsInLogs(logs string, a *check.Action) { + uniqueFailures := make(map[string]int) + for _, msg := range strings.Split(logs, "\n") { + for fail, ignoreMsgs := range errorMsgsWithExceptions { + if strings.Contains(msg, fail) { + ok := false + for _, ignore := range ignoreMsgs { + if strings.Contains(msg, ignore) { + ok = true + break + } + } + if !ok { + count, _ := uniqueFailures[msg] + uniqueFailures[msg] = count + 1 + } + } + } + } + if len(uniqueFailures) > 0 { + failures := make([]string, 0, len(uniqueFailures)) + for f, c := range uniqueFailures { + failures = append(failures, f) + + a.Logf("Found %q in logs %d times\n", f, c) + } + failureMsgs := strings.Join(failures, "\n") + a.Failf("Found %d logs matching list of errors that must be investigated:\n%s", len(uniqueFailures), failureMsgs) + } +} + +const ( + // Logs messages that should not be in the cilium logs + panicMessage = "panic:" + deadLockHeader = "POTENTIAL DEADLOCK:" // from github.com/sasha-s/go-deadlock/deadlock.go:header + segmentationFault = "segmentation fault" // from https://github.com/cilium/cilium/issues/3233 + NACKreceived = "NACK received for version" // from https://github.com/cilium/cilium/issues/4003 + RunInitFailed = "JoinEP: " // from https://github.com/cilium/cilium/pull/5052 + sizeMismatch = "size mismatch for BPF map" // from https://github.com/cilium/cilium/issues/7851 + emptyBPFInitArg = "empty argument passed to bpf/init.sh" // from https://github.com/cilium/cilium/issues/10228 + RemovingMapMsg = "Removing map to allow for property upgrade" // from https://github.com/cilium/cilium/pull/10626 + logBufferMessage = "Log buffer too small to dump verifier log" // from https://github.com/cilium/cilium/issues/10517 + ClangErrorsMsg = " errors generated." // from https://github.com/cilium/cilium/issues/10857 + ClangErrorMsg = "1 error generated." // from https://github.com/cilium/cilium/issues/10857 + symbolSubstitution = "Skipping symbol substitution" // + uninitializedRegen = "Uninitialized regeneration level" // from https://github.com/cilium/cilium/pull/10949 + unstableStat = "BUG: stat() has unstable behavior" // from https://github.com/cilium/cilium/pull/11028 + removeTransientRule = "Unable to process chain CILIUM_TRANSIENT_FORWARD with ip" // from https://github.com/cilium/cilium/issues/11276 + missingIptablesWait = "Missing iptables wait arg (-w):" + localIDRestoreFail = "Could not restore all CIDR identities" // from https://github.com/cilium/cilium/pull/19556 + routerIPMismatch = "Mismatch of router IPs found during restoration" + emptyIPNodeIDAlloc = "Attempt to allocate a node ID for an empty node IP address" +) + +// The list is addopted from cilium/cilium/test/helper/utils.go +var errorMsgsWithExceptions = map[string][]string{ + panicMessage: nil, + deadLockHeader: nil, + segmentationFault: nil, + NACKreceived: nil, + RunInitFailed: nil, + sizeMismatch: {"globals/cilium_policy"}, + emptyBPFInitArg: nil, + RemovingMapMsg: nil, + logBufferMessage: nil, + ClangErrorsMsg: nil, + ClangErrorMsg: nil, + symbolSubstitution: nil, + uninitializedRegen: nil, + unstableStat: nil, + removeTransientRule: nil, + missingIptablesWait: nil, + localIDRestoreFail: nil, + routerIPMismatch: nil, + emptyIPNodeIDAlloc: nil, + "DATA RACE": nil, + // Exceptions for level=error should only be added as a last resort, if the + // error cannot be fixed in Cilium or in the test. + "level=error": {"Error in delegate stream, restarting"}, +}