-
Notifications
You must be signed in to change notification settings - Fork 208
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
connectivity: Check for Cilium agent errors
This commit adds a test case to check for errors in cilium-agent logs. The list of errors and helper utils were adopted from cilium/cilium/test/helper/utils.go. The error list diff from the former: * Removed all exceptions for "level=error" entries. * Added "Error in delegate stream, restarting" exception to "level=error". It's red-herring, and it will be chaned to warn or info by the SM team. Signed-off-by: Martynas Pumputis <[email protected]>
- Loading branch information
Showing
4 changed files
with
137 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// Copyright Authors of Cilium | ||
|
||
package tests | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
"time" | ||
|
||
"github.com/cilium/cilium-cli/connectivity/check" | ||
) | ||
|
||
// NoErrorsInLogs checks whether there are no error messages in cilium-agent | ||
// logs. The error messages are defined in badLogMsgsWithExceptions, which key | ||
// is an error message, while values is a list of ignored messages. | ||
func NoErrorsInLogs() check.Scenario { | ||
return &noErrorsInLogs{} | ||
} | ||
|
||
type noErrorsInLogs struct{} | ||
|
||
func (n *noErrorsInLogs) Name() string { | ||
return "no-errors-in-logs" | ||
} | ||
|
||
func (n *noErrorsInLogs) Run(ctx context.Context, t *check.Test) { | ||
var since time.Time | ||
ct := t.Context() | ||
|
||
for _, pod := range ct.CiliumPods() { | ||
pod := pod | ||
logs, err := pod.K8sClient.CiliumLogs(ctx, pod.Pod.Namespace, pod.Pod.Name, since, nil) | ||
if err != nil { | ||
t.Fatalf("Error reading Cilium logs: %s", err) | ||
} | ||
t.NewAction(n, pod.Pod.Name, nil, nil, check.IPFamilyNone).Run(func(a *check.Action) { | ||
checkErrorsInLogs(logs, a) | ||
}) | ||
} | ||
|
||
} | ||
|
||
func checkErrorsInLogs(logs string, a *check.Action) { | ||
uniqueFailures := make(map[string]int) | ||
for _, msg := range strings.Split(logs, "\n") { | ||
for fail, ignoreMsgs := range errorMsgsWithExceptions { | ||
if strings.Contains(msg, fail) { | ||
ok := false | ||
for _, ignore := range ignoreMsgs { | ||
if strings.Contains(msg, ignore) { | ||
ok = true | ||
break | ||
} | ||
} | ||
if !ok { | ||
count, _ := uniqueFailures[msg] | ||
uniqueFailures[msg] = count + 1 | ||
} | ||
} | ||
} | ||
} | ||
if len(uniqueFailures) > 0 { | ||
failures := make([]string, 0, len(uniqueFailures)) | ||
for f, c := range uniqueFailures { | ||
failures = append(failures, f) | ||
|
||
a.Logf("Found %q in logs %d times\n", f, c) | ||
} | ||
failureMsgs := strings.Join(failures, "\n") | ||
a.Failf("Found %d logs matching list of errors that must be investigated:\n%s", len(uniqueFailures), failureMsgs) | ||
} | ||
} | ||
|
||
const ( | ||
// Logs messages that should not be in the cilium logs | ||
panicMessage = "panic:" | ||
deadLockHeader = "POTENTIAL DEADLOCK:" // from github.com/sasha-s/go-deadlock/deadlock.go:header | ||
segmentationFault = "segmentation fault" // from https://github.com/cilium/cilium/issues/3233 | ||
NACKreceived = "NACK received for version" // from https://github.com/cilium/cilium/issues/4003 | ||
RunInitFailed = "JoinEP: " // from https://github.com/cilium/cilium/pull/5052 | ||
sizeMismatch = "size mismatch for BPF map" // from https://github.com/cilium/cilium/issues/7851 | ||
emptyBPFInitArg = "empty argument passed to bpf/init.sh" // from https://github.com/cilium/cilium/issues/10228 | ||
RemovingMapMsg = "Removing map to allow for property upgrade" // from https://github.com/cilium/cilium/pull/10626 | ||
logBufferMessage = "Log buffer too small to dump verifier log" // from https://github.com/cilium/cilium/issues/10517 | ||
ClangErrorsMsg = " errors generated." // from https://github.com/cilium/cilium/issues/10857 | ||
ClangErrorMsg = "1 error generated." // from https://github.com/cilium/cilium/issues/10857 | ||
symbolSubstitution = "Skipping symbol substitution" // | ||
uninitializedRegen = "Uninitialized regeneration level" // from https://github.com/cilium/cilium/pull/10949 | ||
unstableStat = "BUG: stat() has unstable behavior" // from https://github.com/cilium/cilium/pull/11028 | ||
removeTransientRule = "Unable to process chain CILIUM_TRANSIENT_FORWARD with ip" // from https://github.com/cilium/cilium/issues/11276 | ||
missingIptablesWait = "Missing iptables wait arg (-w):" | ||
localIDRestoreFail = "Could not restore all CIDR identities" // from https://github.com/cilium/cilium/pull/19556 | ||
routerIPMismatch = "Mismatch of router IPs found during restoration" | ||
emptyIPNodeIDAlloc = "Attempt to allocate a node ID for an empty node IP address" | ||
) | ||
|
||
// The list is addopted from cilium/cilium/test/helper/utils.go | ||
var errorMsgsWithExceptions = map[string][]string{ | ||
panicMessage: nil, | ||
deadLockHeader: nil, | ||
segmentationFault: nil, | ||
NACKreceived: nil, | ||
RunInitFailed: nil, | ||
sizeMismatch: {"globals/cilium_policy"}, | ||
emptyBPFInitArg: nil, | ||
RemovingMapMsg: nil, | ||
logBufferMessage: nil, | ||
ClangErrorsMsg: nil, | ||
ClangErrorMsg: nil, | ||
symbolSubstitution: nil, | ||
uninitializedRegen: nil, | ||
unstableStat: nil, | ||
removeTransientRule: nil, | ||
missingIptablesWait: nil, | ||
localIDRestoreFail: nil, | ||
routerIPMismatch: nil, | ||
emptyIPNodeIDAlloc: nil, | ||
"DATA RACE": nil, | ||
// Exceptions for level=error should only be added as a last resort, if the | ||
// error cannot be fixed in Cilium or in the test. | ||
"level=error": {"Error in delegate stream, restarting"}, | ||
} |