Skip to content

Commit

Permalink
Reduce newEvictedQueueLink and newEvictedQueueEvent memory alloca…
Browse files Browse the repository at this point in the history
…tions (#5858)

Good day,

While doing some profile with pprof on one of our services I notices
that `sync.OnceFunc` was allocating a nice amount of objects. These
`sync.OnceFunc` calls where done by `newEvictedQueueEvent` and
`newEvictedQueueLink`.

So to avoid these extra allocation I created this PR which replaces the
`sync.OnceFunc` with `sync.Once` which is now part of the evictedQueue.
This resulted in the following benchstat result (commit
baad07e):
```
goos: linux
goarch: amd64
pkg: go.opentelemetry.io/otel/sdk/trace
cpu: 11th Gen Intel(R) Core(TM) i5-11400H @ 2.70GHz
                                              │ 1727937489-attr-old.txt │        evic-new1728018933.txt        │
                                              │         sec/op          │    sec/op     vs base                │
RecordingSpanSetAttributes/WithLimit/false-12              6.579µ ±  9%   5.875µ ±  8%  -10.71% (p=0.000 n=10)
RecordingSpanSetAttributes/WithLimit/true-12               14.27µ ± 10%   11.82µ ± 16%  -17.18% (p=0.009 n=10)
SpanEnd-12                                                 63.44n ±  0%   75.97n ±  2%  +19.75% (p=0.000 n=10)
TraceStart/with_a_simple_span-12                           728.0n ±  3%   412.7n ±  1%  -43.31% (p=0.000 n=10)
TraceStart/with_several_links-12                           881.5n ±  1%   553.2n ±  3%  -37.25% (p=0.000 n=10)
TraceStart/with_attributes-12                              916.5n ±  2%   598.4n ±  2%  -34.70% (p=0.000 n=10)
SpanProcessorOnEnd/batch:_10,_spans:_10-12                 151.9n ±  3%   144.1n ±  3%   -5.13% (p=0.000 n=10)
SpanProcessorOnEnd/batch:_10,_spans:_100-12                1.497µ ±  2%   1.435µ ±  2%   -4.14% (p=0.000 n=10)
SpanProcessorOnEnd/batch:_100,_spans:_10-12                148.5n ±  2%   143.0n ±  2%   -3.67% (p=0.000 n=10)
SpanProcessorOnEnd/batch:_100,_spans:_100-12               1.481µ ±  3%   1.442µ ±  4%   -2.67% (p=0.022 n=10)
SpanProcessorVerboseLogging-12                             9.376µ ±  1%   6.662µ ±  2%  -28.94% (p=0.000 n=10)
SpanLimits/AttributeValueLengthLimit-12                    7.221µ ±  4%   6.605µ ±  3%   -8.54% (p=0.002 n=10)
SpanLimits/AttributeCountLimit-12                          6.300µ ±  6%   6.091µ ±  6%   -3.30% (p=0.007 n=10)
SpanLimits/EventCountLimit-12                              5.921µ ±  4%   5.506µ ±  7%   -7.02% (p=0.011 n=10)
SpanLimits/LinkCountLimit-12                               6.073µ ±  6%   5.423µ ±  3%  -10.71% (p=0.000 n=10)
SpanLimits/AttributePerEventCountLimit-12                  6.455µ ±  3%   5.294µ ±  9%  -17.98% (p=0.000 n=10)
SpanLimits/AttributePerLinkCountLimit-12                   6.263µ ±  4%   5.850µ ±  7%   -6.59% (p=0.000 n=10)
SpanSetAttributesOverCapacity-12                           1.683µ ±  1%   1.391µ ±  0%  -17.35% (p=0.000 n=10)
StartEndSpan/AlwaysSample-12                               803.0n ±  4%   531.4n ±  5%  -33.82% (p=0.000 n=10)
StartEndSpan/NeverSample-12                                216.0n ±  2%   212.7n ±  2%        ~ (p=0.055 n=10)
SpanWithAttributes_4/AlwaysSample-12                      1216.5n ±  2%   877.6n ±  9%  -27.85% (p=0.000 n=10)
SpanWithAttributes_4/NeverSample-12                        370.4n ±  5%   374.5n ±  3%        ~ (p=0.197 n=10)
SpanWithAttributes_8/AlwaysSample-12                       1.494µ ±  4%   1.117µ ±  4%  -25.27% (p=0.000 n=10)
SpanWithAttributes_8/NeverSample-12                        477.1n ±  3%   475.1n ±  7%        ~ (p=0.739 n=10)
SpanWithAttributes_all/AlwaysSample-12                    1310.5n ±  5%   971.5n ±  6%  -25.87% (p=0.000 n=10)
SpanWithAttributes_all/NeverSample-12                      388.0n ±  5%   389.1n ±  6%        ~ (p=0.699 n=10)
SpanWithAttributes_all_2x/AlwaysSample-12                  1.664µ ±  2%   1.236µ ±  3%  -25.72% (p=0.000 n=10)
SpanWithAttributes_all_2x/NeverSample-12                   522.7n ±  7%   527.2n ±  9%        ~ (p=0.912 n=10)
SpanWithEvents_4/AlwaysSample-12                           1.411µ ±  4%   1.058µ ±  2%  -24.99% (p=0.000 n=10)
SpanWithEvents_4/NeverSample-12                            218.4n ±  3%   218.9n ±  2%        ~ (p=0.971 n=10)
SpanWithEvents_8/AlwaysSample-12                           1.997µ ±  5%   1.657µ ±  4%  -17.03% (p=0.000 n=10)
SpanWithEvents_8/NeverSample-12                            226.6n ±  3%   221.7n ±  2%   -2.14% (p=0.015 n=10)
SpanWithEvents_WithStackTrace/AlwaysSample-12             1016.5n ±  2%   712.5n ±  2%  -29.91% (p=0.000 n=10)
SpanWithEvents_WithStackTrace/NeverSample-12               242.0n ±  3%   241.0n ±  1%        ~ (p=0.254 n=10)
SpanWithEvents_WithTimestamp/AlwaysSample-12              1019.5n ±  3%   713.2n ±  6%  -30.04% (p=0.000 n=10)
SpanWithEvents_WithTimestamp/NeverSample-12                276.4n ±  3%   276.6n ±  2%        ~ (p=0.723 n=10)
TraceID_DotString-12                                       71.94n ±  3%   75.71n ±  3%   +5.23% (p=0.001 n=10)
SpanID_DotString-12                                        50.88n ±  3%   52.35n ±  1%   +2.88% (p=0.001 n=10)
geomean                                                    952.8n         822.2n        -13.71%

                                              │ 1727937489-attr-old.txt │         evic-new1728018933.txt         │
                                              │          B/op           │     B/op      vs base                  │
RecordingSpanSetAttributes/WithLimit/false-12            7.062Ki ± 0%     6.891Ki ± 0%   -2.43% (p=0.000 n=10)
RecordingSpanSetAttributes/WithLimit/true-12             7.564Ki ± 0%     7.393Ki ± 0%   -2.27% (p=0.000 n=10)
SpanEnd-12                                                 0.000 ± 0%       0.000 ± 0%        ~ (p=1.000 n=10) ¹
TraceStart/with_a_simple_span-12                           704.0 ± 0%       528.0 ± 0%  -25.00% (p=0.000 n=10)
TraceStart/with_several_links-12                           880.0 ± 0%       704.0 ± 0%  -20.00% (p=0.000 n=10)
TraceStart/with_attributes-12                              960.0 ± 0%       784.0 ± 0%  -18.33% (p=0.000 n=10)
SpanProcessorOnEnd/batch:_10,_spans:_10-12                 0.000 ± 0%       0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorOnEnd/batch:_10,_spans:_100-12                0.000 ± 0%       0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorOnEnd/batch:_100,_spans:_10-12                0.000 ± 0%       0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorOnEnd/batch:_100,_spans:_100-12               0.000 ± 0%       0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorVerboseLogging-12                          10.938Ki ± 0%     9.219Ki ± 0%  -15.71% (p=0.000 n=10)
SpanLimits/AttributeValueLengthLimit-12                  10.76Ki ± 0%     10.59Ki ± 0%   -1.60% (p=0.000 n=10)
SpanLimits/AttributeCountLimit-12                       10.016Ki ± 0%     9.844Ki ± 0%   -1.72% (p=0.000 n=10)
SpanLimits/EventCountLimit-12                            9.594Ki ± 0%     9.422Ki ± 0%   -1.79% (p=0.000 n=10)
SpanLimits/LinkCountLimit-12                             9.203Ki ± 0%     9.031Ki ± 0%   -1.87% (p=0.000 n=10)
SpanLimits/AttributePerEventCountLimit-12                10.64Ki ± 0%     10.47Ki ± 0%   -1.62% (p=0.000 n=10)
SpanLimits/AttributePerLinkCountLimit-12                 10.64Ki ± 0%     10.47Ki ± 0%   -1.62% (p=0.000 n=10)
SpanSetAttributesOverCapacity-12                           768.0 ± 0%       592.0 ± 0%  -22.92% (p=0.000 n=10)
StartEndSpan/AlwaysSample-12                               704.0 ± 0%       528.0 ± 0%  -25.00% (p=0.000 n=10)
StartEndSpan/NeverSample-12                                144.0 ± 0%       144.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_4/AlwaysSample-12                     1.188Ki ± 0%     1.016Ki ± 0%  -14.47% (p=0.000 n=10)
SpanWithAttributes_4/NeverSample-12                        400.0 ± 0%       400.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_8/AlwaysSample-12                     1.688Ki ± 0%     1.516Ki ± 0%  -10.19% (p=0.000 n=10)
SpanWithAttributes_8/NeverSample-12                        656.0 ± 0%       656.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_all/AlwaysSample-12                   1.312Ki ± 0%     1.141Ki ± 0%  -13.10% (p=0.000 n=10)
SpanWithAttributes_all/NeverSample-12                      464.0 ± 0%       464.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_all_2x/AlwaysSample-12                2.062Ki ± 0%     1.891Ki ± 0%   -8.33% (p=0.000 n=10)
SpanWithAttributes_all_2x/NeverSample-12                   848.0 ± 0%       848.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_4/AlwaysSample-12                         1.188Ki ± 0%     1.016Ki ± 0%  -14.47% (p=0.000 n=10)
SpanWithEvents_4/NeverSample-12                            144.0 ± 0%       144.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_8/AlwaysSample-12                         1.812Ki ± 0%     1.641Ki ± 0%   -9.48% (p=0.000 n=10)
SpanWithEvents_8/NeverSample-12                            144.0 ± 0%       144.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_WithStackTrace/AlwaysSample-12              800.0 ± 0%       624.0 ± 0%  -22.00% (p=0.000 n=10)
SpanWithEvents_WithStackTrace/NeverSample-12               160.0 ± 0%       160.0 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_WithTimestamp/AlwaysSample-12               824.0 ± 0%       648.0 ± 0%  -21.36% (p=0.000 n=10)
SpanWithEvents_WithTimestamp/NeverSample-12                184.0 ± 0%       184.0 ± 0%        ~ (p=1.000 n=10) ¹
TraceID_DotString-12                                       64.00 ± 0%       64.00 ± 0%        ~ (p=1.000 n=10) ¹
SpanID_DotString-12                                        32.00 ± 0%       32.00 ± 0%        ~ (p=1.000 n=10) ¹
geomean                                                               ²                  -7.15%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                                              │ 1727937489-attr-old.txt │        evic-new1728018933.txt        │
                                              │        allocs/op        │ allocs/op   vs base                  │
RecordingSpanSetAttributes/WithLimit/false-12             15.000 ± 0%     3.000 ± 0%  -80.00% (p=0.000 n=10)
RecordingSpanSetAttributes/WithLimit/true-12              20.000 ± 0%     8.000 ± 0%  -60.00% (p=0.000 n=10)
SpanEnd-12                                                 0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
TraceStart/with_a_simple_span-12                          14.000 ± 0%     2.000 ± 0%  -85.71% (p=0.000 n=10)
TraceStart/with_several_links-12                          15.000 ± 0%     3.000 ± 0%  -80.00% (p=0.000 n=10)
TraceStart/with_attributes-12                             16.000 ± 0%     4.000 ± 0%  -75.00% (p=0.000 n=10)
SpanProcessorOnEnd/batch:_10,_spans:_10-12                 0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorOnEnd/batch:_10,_spans:_100-12                0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorOnEnd/batch:_100,_spans:_10-12                0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorOnEnd/batch:_100,_spans:_100-12               0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanProcessorVerboseLogging-12                            155.00 ± 0%     35.00 ± 0%  -77.42% (p=0.000 n=10)
SpanLimits/AttributeValueLengthLimit-12                    54.00 ± 0%     42.00 ± 0%  -22.22% (p=0.000 n=10)
SpanLimits/AttributeCountLimit-12                          50.00 ± 0%     38.00 ± 0%  -24.00% (p=0.000 n=10)
SpanLimits/EventCountLimit-12                              47.00 ± 0%     35.00 ± 0%  -25.53% (p=0.000 n=10)
SpanLimits/LinkCountLimit-12                               47.00 ± 0%     35.00 ± 0%  -25.53% (p=0.000 n=10)
SpanLimits/AttributePerEventCountLimit-12                  50.00 ± 0%     38.00 ± 0%  -24.00% (p=0.000 n=10)
SpanLimits/AttributePerLinkCountLimit-12                   50.00 ± 0%     38.00 ± 0%  -24.00% (p=0.000 n=10)
SpanSetAttributesOverCapacity-12                          15.000 ± 0%     3.000 ± 0%  -80.00% (p=0.000 n=10)
StartEndSpan/AlwaysSample-12                              14.000 ± 0%     2.000 ± 0%  -85.71% (p=0.000 n=10)
StartEndSpan/NeverSample-12                                2.000 ± 0%     2.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_4/AlwaysSample-12                      16.000 ± 0%     4.000 ± 0%  -75.00% (p=0.000 n=10)
SpanWithAttributes_4/NeverSample-12                        3.000 ± 0%     3.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_8/AlwaysSample-12                      16.000 ± 0%     4.000 ± 0%  -75.00% (p=0.000 n=10)
SpanWithAttributes_8/NeverSample-12                        3.000 ± 0%     3.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_all/AlwaysSample-12                    16.000 ± 0%     4.000 ± 0%  -75.00% (p=0.000 n=10)
SpanWithAttributes_all/NeverSample-12                      3.000 ± 0%     3.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithAttributes_all_2x/AlwaysSample-12                 16.000 ± 0%     4.000 ± 0%  -75.00% (p=0.000 n=10)
SpanWithAttributes_all_2x/NeverSample-12                   3.000 ± 0%     3.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_4/AlwaysSample-12                          17.000 ± 0%     5.000 ± 0%  -70.59% (p=0.000 n=10)
SpanWithEvents_4/NeverSample-12                            2.000 ± 0%     2.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_8/AlwaysSample-12                          18.000 ± 0%     6.000 ± 0%  -66.67% (p=0.000 n=10)
SpanWithEvents_8/NeverSample-12                            2.000 ± 0%     2.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_WithStackTrace/AlwaysSample-12             16.000 ± 0%     4.000 ± 0%  -75.00% (p=0.000 n=10)
SpanWithEvents_WithStackTrace/NeverSample-12               3.000 ± 0%     3.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanWithEvents_WithTimestamp/AlwaysSample-12              17.000 ± 0%     5.000 ± 0%  -70.59% (p=0.000 n=10)
SpanWithEvents_WithTimestamp/NeverSample-12                4.000 ± 0%     4.000 ± 0%        ~ (p=1.000 n=10) ¹
TraceID_DotString-12                                       2.000 ± 0%     2.000 ± 0%        ~ (p=1.000 n=10) ¹
SpanID_DotString-12                                        2.000 ± 0%     2.000 ± 0%        ~ (p=1.000 n=10) ¹
geomean                                                               ²               -47.73%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean
```

---------

Co-authored-by: Damien Mathieu <[email protected]>
Co-authored-by: Robert Pająk <[email protected]>
  • Loading branch information
3 people authored Oct 8, 2024
1 parent 4a911f9 commit 8fbaa97
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- `FilterProcessor.Enabled` in `go.opentelemetry.io/otel/sdk/log/internal/x` now accepts `EnabledParameters` instead of `Record`. (#5791)
- The `Record` type in `go.opentelemetry.io/otel/log` is no longer comparable. (#5847)
- Performance improvements for the trace SDK `SetAttributes` method in `Span`. (#5864)
- Reduce memory allocations for the `Event` and `Link` lists in `Span`. (#5858)

### Deprecated

Expand Down
21 changes: 13 additions & 8 deletions sdk/trace/evictedqueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,26 @@ import (

// evictedQueue is a FIFO queue with a configurable capacity.
type evictedQueue[T any] struct {
queue []T
capacity int
droppedCount int
logDropped func()
queue []T
capacity int
droppedCount int
logDroppedMsg string
logDroppedOnce sync.Once
}

func newEvictedQueueEvent(capacity int) evictedQueue[Event] {
// Do not pre-allocate queue, do this lazily.
return evictedQueue[Event]{
capacity: capacity,
logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Event") }),
capacity: capacity,
logDroppedMsg: "limit reached: dropping trace trace.Event",
}
}

func newEvictedQueueLink(capacity int) evictedQueue[Link] {
// Do not pre-allocate queue, do this lazily.
return evictedQueue[Link]{
capacity: capacity,
logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Link") }),
capacity: capacity,
logDroppedMsg: "limit reached: dropping trace trace.Link",
}
}

Expand All @@ -53,6 +54,10 @@ func (eq *evictedQueue[T]) add(value T) {
eq.queue = append(eq.queue, value)
}

func (eq *evictedQueue[T]) logDropped() {
eq.logDroppedOnce.Do(func() { global.Warn(eq.logDroppedMsg) })
}

// copy returns a copy of the evictedQueue.
func (eq *evictedQueue[T]) copy() []T {
return slices.Clone(eq.queue)
Expand Down
23 changes: 17 additions & 6 deletions sdk/trace/evictedqueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ import (
"reflect"
"testing"

"github.com/go-logr/logr"
"github.com/go-logr/logr/funcr"
"github.com/stretchr/testify/assert"

"go.opentelemetry.io/otel/internal/global"
)

func init() {
Expand Down Expand Up @@ -36,18 +40,25 @@ func TestCopy(t *testing.T) {

func TestDropCount(t *testing.T) {
q := newEvictedQueueEvent(3)
var called bool
q.logDropped = func() { called = true }

var called int
t.Cleanup(func(l logr.Logger) func() {
return func() { global.SetLogger(l) }
}(global.GetLogger()))
global.SetLogger(funcr.New(func(prefix, args string) {
called++
}, funcr.Options{Verbosity: 1}))

q.add(Event{Name: "value1"})
assert.False(t, called, `"value1" logged as dropped`)
assert.Equal(t, 0, called, `"value1" logged as dropped`)
q.add(Event{Name: "value2"})
assert.False(t, called, `"value2" logged as dropped`)
assert.Equal(t, 0, called, `"value2" logged as dropped`)
q.add(Event{Name: "value3"})
assert.False(t, called, `"value3" logged as dropped`)
assert.Equal(t, 0, called, `"value3" logged as dropped`)
q.add(Event{Name: "value1"})
assert.True(t, called, `"value2" not logged as dropped`)
assert.Equal(t, 1, called, `"value2" not logged as dropped`)
q.add(Event{Name: "value4"})
assert.Equal(t, 1, called, `"value4" logged as dropped`)
if wantLen, gotLen := 3, len(q.queue); wantLen != gotLen {
t.Errorf("got queue length %d want %d", gotLen, wantLen)
}
Expand Down

0 comments on commit 8fbaa97

Please sign in to comment.