Skip to content

Commit

Permalink
apm: fix deadlock in breakdown metrics calculation
Browse files Browse the repository at this point in the history
Fix a deadlock that can occur when concurrently ending
a parent and child span, due to the parent waiting for
the child to release the transaction lock, and the child
waiting to lock the parent. Locks are now taken in a
consistent order.
  • Loading branch information
axw committed Aug 5, 2021
1 parent b7aa8dc commit c0a1786
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ endif::[]
https://github.com/elastic/apm-agent-go/compare/v1.13.0...master[View commits]
- Fix concurrency bugs in breakdown metrics and module/apmhttp.WithClientTrace {pull}997[#(997)]
[[release-notes-1.x]]
=== Go Agent version 1.x
Expand Down
17 changes: 13 additions & 4 deletions span.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ func (tx *Transaction) StartSpanOptions(name, spanType string, opts SpanOptions)
}
transactionID := tx.traceContext.Span

// Lock the parent first to avoid deadlocks in breakdown metrics calculation.
if opts.parent != nil {
opts.parent.mu.Lock()
defer opts.parent.mu.Unlock()
}

// Prevent tx from being ended while we're starting a span.
tx.mu.RLock()
defer tx.mu.RUnlock()
Expand Down Expand Up @@ -117,8 +123,6 @@ func (tx *Transaction) StartSpanOptions(name, spanType string, opts SpanOptions)

if tx.breakdownMetricsEnabled {
if span.parent != nil {
span.parent.mu.Lock()
defer span.parent.mu.Unlock()
if !span.parent.ended() {
span.parent.childrenTimer.childStarted(span.timestamp)
}
Expand Down Expand Up @@ -334,6 +338,13 @@ func (s *Span) ParentID() SpanID {
func (s *Span) reportSelfTime() {
endTime := s.timestamp.Add(s.Duration)

// If this span has a parent span, lock it before proceeding to
// prevent deadlocking when concurrently ending parent and child.
if s.parent != nil {
s.parent.mu.Lock()
defer s.parent.mu.Unlock()
}

// TODO(axw) try to find a way to not lock the transaction when
// ending every span. We already lock them when starting spans.
s.tx.mu.RLock()
Expand All @@ -345,11 +356,9 @@ func (s *Span) reportSelfTime() {
s.tx.TransactionData.mu.Lock()
defer s.tx.TransactionData.mu.Unlock()
if s.parent != nil {
s.parent.mu.Lock()
if !s.parent.ended() {
s.parent.childrenTimer.childEnded(endTime)
}
s.parent.mu.Unlock()
} else {
s.tx.childrenTimer.childEnded(endTime)
}
Expand Down

0 comments on commit c0a1786

Please sign in to comment.