Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fallback to proc when ebpf timestamps are wrong #2336

Merged
merged 3 commits into from
Mar 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions probe/endpoint/connection_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,18 @@ func (t *connectionTracker) ReportConnections(rpt *report.Report) {
hostNodeID := report.MakeHostNodeID(t.conf.HostID)

if t.ebpfTracker != nil {
t.performEbpfTrack(rpt, hostNodeID)
return
if !t.ebpfTracker.isDead() {
t.performEbpfTrack(rpt, hostNodeID)
return
}
log.Warnf("ebpf tracker died, gently falling back to proc scanning")
if t.conf.WalkProc && t.conf.Scanner == nil {
t.conf.Scanner = procspy.NewConnectionScanner(t.conf.ProcessCache)
}
if t.flowWalker == nil {
t.flowWalker = newConntrackFlowWalker(t.conf.UseConntrack, t.conf.ProcRoot, t.conf.BufferSize, "--any-nat")
}
t.ebpfTracker = nil
}

// seenTuples contains information about connections seen by conntrack and it will be passed to the /proc parser
Expand Down
13 changes: 12 additions & 1 deletion probe/endpoint/ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type eventTracker interface {
walkConnections(f func(ebpfConnection))
feedInitialConnections(ci procspy.ConnIter, seenTuples map[string]fourTuple, hostNodeID string)
isReadyToHandleConnections() bool
isDead() bool
stop()
}

Expand Down Expand Up @@ -99,7 +100,13 @@ var lastTimestampV4 uint64

func tcpEventCbV4(e tracer.TcpV4) {
if lastTimestampV4 > e.Timestamp {
log.Errorf("ERROR: late event!\n")
// A kernel bug can cause the timestamps to be wrong (e.g. on Ubuntu with Linux 4.4.0-47.68)
// Upgrading the kernel will fix the problem. For further info see:
// https://github.com/iovisor/bcc/issues/790#issuecomment-263704235
// https://github.com/weaveworks/scope/issues/2334
log.Errorf("tcp tracer received event with timestamp %v even though the last timestamp was %v. Stopping the eBPF tracker.", e.Timestamp, lastTimestampV4)
ebpfTracker.dead = true
ebpfTracker.stop()
}

lastTimestampV4 = e.Timestamp
Expand Down Expand Up @@ -197,6 +204,10 @@ func (t *EbpfTracker) isReadyToHandleConnections() bool {
return t.readyToHandleConnections
}

func (t *EbpfTracker) isDead() bool {
return t.dead
}

func (t *EbpfTracker) stop() {
// TODO: implement proper stopping logic
//
Expand Down
58 changes: 57 additions & 1 deletion probe/endpoint/ebpf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,62 @@ func TestWalkConnections(t *testing.T) {
cnt++
})
if cnt != 2 {
t.Errorf("walkConnetions found %v instead of 2 connections", cnt)
t.Errorf("walkConnections found %v instead of 2 connections", cnt)
}
}

func TestInvalidTimeStampDead(t *testing.T) {
var (
cnt int
ClientPid uint32 = 43
ServerIP = net.IP("127.0.0.1")
ClientIP = net.IP("127.0.0.2")
ServerPort uint16 = 12345
ClientPort uint16 = 6789
NetNS uint32 = 123456789
event = tracer.TcpV4{
CPU: 0,
Type: tracer.EventConnect,
Pid: ClientPid,
Comm: "cmd",
SAddr: ClientIP,
DAddr: ServerIP,
SPort: ClientPort,
DPort: ServerPort,
NetNS: NetNS,
}
)
mockEbpfTracker := &EbpfTracker{
readyToHandleConnections: true,
dead: false,
openConnections: map[string]ebpfConnection{},
}
ebpfTracker = mockEbpfTracker
event.Timestamp = 0
tcpEventCbV4(event)
event2 := event
event2.SPort = 1
event2.Timestamp = 2
tcpEventCbV4(event2)
mockEbpfTracker.walkConnections(func(e ebpfConnection) {
cnt++
})
if cnt != 2 {
t.Errorf("walkConnections found %v instead of 2 connections", cnt)
}
if mockEbpfTracker.isDead() {
t.Errorf("expected ebpfTracker to be alive after events with valid order")
}
cnt = 0
event.Timestamp = 1
tcpEventCbV4(event)
mockEbpfTracker.walkConnections(func(e ebpfConnection) {
cnt++
})
if cnt != 2 {
t.Errorf("walkConnections found %v instead of 2 connections", cnt)
}
if !mockEbpfTracker.isDead() {
t.Errorf("expected ebpfTracker to be set to dead after events with wrong order")
}
}
6 changes: 4 additions & 2 deletions probe/endpoint/procspy/reader_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ func (br *backgroundReader) getWalkedProcPid(buf *bytes.Buffer) (map[uint64]*Pro
br.mtx.Lock()
defer br.mtx.Unlock()

var err error
// Don't access latestBuf directly but create a reader. In this way,
// the buffer will not be empty in the next call of getWalkedProcPid
// and it can be copied again.
_, err := io.Copy(buf, bytes.NewReader(br.latestBuf.Bytes()))

if br.latestBuf != nil {
_, err = io.Copy(buf, bytes.NewReader(br.latestBuf.Bytes()))
}
return br.latestSockets, err
}

Expand Down