Skip to content

Commit

Permalink
connectivity: Check for Cilium agent errors
Browse files Browse the repository at this point in the history
This commit adds a test case to check for errors in cilium-agent logs.
The list of errors and helper utils were adopted from
cilium/cilium/test/helper/utils.go. The error list diff from the former:

* Removed all exceptions for "level=error" entries.
* Added "Error in delegate stream, restarting" exception to
  "level=error". It's red-herring, and it will be chaned to warn or info
  by the SM team.

Signed-off-by: Martynas Pumputis <[email protected]>
  • Loading branch information
brb committed Jun 20, 2023
1 parent d773153 commit 171bed0
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 2 deletions.
9 changes: 7 additions & 2 deletions connectivity/check/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"io"
"math"
"net"
"reflect"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -204,8 +205,12 @@ func (a *Action) Run(f func(*Action)) {
// TODO(timo): printFlows is a misnomer, this function actually prints
// the verdict annotated over the list of flows.
if a.test.ctx.PrintFlows() || a.failed {
a.printFlows(a.Source())
a.printFlows(a.Destination())
if a.Source() != nil && !reflect.ValueOf(a.Source()).IsNil() {
a.printFlows(a.Source())
}
if a.Destination() != nil && !reflect.ValueOf(a.Destination()).IsNil() {
a.printFlows(a.Destination())
}
}
if a.failed {
if a.test.ctx.params.PauseOnFail {
Expand Down
3 changes: 3 additions & 0 deletions connectivity/check/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const (
IPFamilyAny IPFamily = iota
IPFamilyV4
IPFamilyV6
IPFamilyNone
)

func (f IPFamily) String() string {
Expand All @@ -23,6 +24,8 @@ func (f IPFamily) String() string {
return "ipv4"
case IPFamilyV6:
return "ipv6"
case IPFamilyNone:
return ""
}
return "undefined"
}
Expand Down
4 changes: 4 additions & 0 deletions connectivity/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -1063,5 +1063,9 @@ func Run(ctx context.Context, ct *check.ConnectivityTest, addExtraTests func(*ch
return err
}

if ct.Params().IncludeUnsafeTests {
ct.NewTest("check-log-errors").WithScenarios(tests.NoErrorsInLogs())
}

return ct.Run(ctx)
}
123 changes: 123 additions & 0 deletions connectivity/tests/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package tests

import (
"context"
"strings"
"time"

"github.com/cilium/cilium-cli/connectivity/check"
)

// NoErrorsInLogs checks whether there are no error messages in cilium-agent
// logs. The error messages are defined in badLogMsgsWithExceptions, which key
// is an error message, while values is a list of ignored messages.
func NoErrorsInLogs() check.Scenario {
return &noErrorsInLogs{}
}

type noErrorsInLogs struct{}

func (n *noErrorsInLogs) Name() string {
return "no-errors-in-logs"
}

func (n *noErrorsInLogs) Run(ctx context.Context, t *check.Test) {
var since time.Time
ct := t.Context()

for _, pod := range ct.CiliumPods() {
pod := pod
logs, err := pod.K8sClient.CiliumLogs(ctx, pod.Pod.Namespace, pod.Pod.Name, since, nil)
if err != nil {
t.Fatalf("Error reading Cilium logs: %s", err)
}
t.NewAction(n, pod.Pod.Name, nil, nil, check.IPFamilyNone).Run(func(a *check.Action) {
checkErrorsInLogs(logs, a)
})
}

}

func checkErrorsInLogs(logs string, a *check.Action) {
uniqueFailures := make(map[string]int)
for _, msg := range strings.Split(logs, "\n") {
for fail, ignoreMsgs := range errorMsgsWithExceptions {
if strings.Contains(msg, fail) {
ok := false
for _, ignore := range ignoreMsgs {
if strings.Contains(msg, ignore) {
ok = true
break
}
}
if !ok {
count, _ := uniqueFailures[msg]
uniqueFailures[msg] = count + 1
}
}
}
}
if len(uniqueFailures) > 0 {
failures := make([]string, 0, len(uniqueFailures))
for f, c := range uniqueFailures {
failures = append(failures, f)

a.Logf("Found %q in logs %d times\n", f, c)
}
failureMsgs := strings.Join(failures, "\n")
a.Failf("Found %d logs matching list of errors that must be investigated:\n%s", len(uniqueFailures), failureMsgs)
}
}

const (
// Logs messages that should not be in the cilium logs
panicMessage = "panic:"
deadLockHeader = "POTENTIAL DEADLOCK:" // from github.com/sasha-s/go-deadlock/deadlock.go:header
segmentationFault = "segmentation fault" // from https://github.com/cilium/cilium/issues/3233
NACKreceived = "NACK received for version" // from https://github.com/cilium/cilium/issues/4003
RunInitFailed = "JoinEP: " // from https://github.com/cilium/cilium/pull/5052
sizeMismatch = "size mismatch for BPF map" // from https://github.com/cilium/cilium/issues/7851
emptyBPFInitArg = "empty argument passed to bpf/init.sh" // from https://github.com/cilium/cilium/issues/10228
RemovingMapMsg = "Removing map to allow for property upgrade" // from https://github.com/cilium/cilium/pull/10626
logBufferMessage = "Log buffer too small to dump verifier log" // from https://github.com/cilium/cilium/issues/10517
ClangErrorsMsg = " errors generated." // from https://github.com/cilium/cilium/issues/10857
ClangErrorMsg = "1 error generated." // from https://github.com/cilium/cilium/issues/10857
symbolSubstitution = "Skipping symbol substitution" //
uninitializedRegen = "Uninitialized regeneration level" // from https://github.com/cilium/cilium/pull/10949
unstableStat = "BUG: stat() has unstable behavior" // from https://github.com/cilium/cilium/pull/11028
removeTransientRule = "Unable to process chain CILIUM_TRANSIENT_FORWARD with ip" // from https://github.com/cilium/cilium/issues/11276
missingIptablesWait = "Missing iptables wait arg (-w):"
localIDRestoreFail = "Could not restore all CIDR identities" // from https://github.com/cilium/cilium/pull/19556
routerIPMismatch = "Mismatch of router IPs found during restoration"
emptyIPNodeIDAlloc = "Attempt to allocate a node ID for an empty node IP address"
)

// The list is addopted from cilium/cilium/test/helper/utils.go
var errorMsgsWithExceptions = map[string][]string{
panicMessage: nil,
deadLockHeader: nil,
segmentationFault: nil,
NACKreceived: nil,
RunInitFailed: nil,
sizeMismatch: {"globals/cilium_policy"},
emptyBPFInitArg: nil,
RemovingMapMsg: nil,
logBufferMessage: nil,
ClangErrorsMsg: nil,
ClangErrorMsg: nil,
symbolSubstitution: nil,
uninitializedRegen: nil,
unstableStat: nil,
removeTransientRule: nil,
missingIptablesWait: nil,
localIDRestoreFail: nil,
routerIPMismatch: nil,
emptyIPNodeIDAlloc: nil,
"DATA RACE": nil,
// Exceptions for level=error should only be added as a last resort, if the
// error cannot be fixed in Cilium or in the test.
"level=error": {"Error in delegate stream, restarting"},
}

0 comments on commit 171bed0

Please sign in to comment.