From a19073aebcddd3e1b0459039ee92be72b00deac5 Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Sun, 24 Nov 2024 08:58:27 -0800 Subject: [PATCH] Add attribute value limits Include tests for span attribute limits. --- CHANGELOG.md | 1 + sdk/span.go | 82 +++++++++++++++++- sdk/span_test.go | 213 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b216387d..ce56b8cf2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ OpenTelemetry Go Automatic Instrumentation adheres to [Semantic Versioning](http ### Added - Add support for span attribute limits to `go.opentelemtry.io/auto/sdk`. ([#1315](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1315)) +- Add support for attribute value limits to `go.opentelemtry.io/auto/sdk`. ([#1325](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1325)) ## [v0.18.0-alpha] - 2024-11-20 diff --git a/sdk/span.go b/sdk/span.go index e81a13014..90d6e6373 100644 --- a/sdk/span.go +++ b/sdk/span.go @@ -8,9 +8,11 @@ import ( "fmt" "reflect" "runtime" + "strings" "sync" "sync/atomic" "time" + "unicode/utf8" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -159,7 +161,8 @@ func convAttrValue(value attribute.Value) telemetry.Value { case attribute.FLOAT64: return telemetry.Float64Value(value.AsFloat64()) case attribute.STRING: - return telemetry.StringValue(value.AsString()) + v := truncate(maxSpan.AttrValueLen, value.AsString()) + return telemetry.StringValue(v) case attribute.BOOLSLICE: slice := value.AsBoolSlice() out := make([]telemetry.Value, 0, len(slice)) @@ -185,6 +188,7 @@ func convAttrValue(value attribute.Value) telemetry.Value { slice := value.AsStringSlice() out := make([]telemetry.Value, 0, len(slice)) for _, v := range slice { + v = truncate(maxSpan.AttrValueLen, v) out = append(out, telemetry.StringValue(v)) } return telemetry.SliceValue(out...) @@ -192,6 +196,82 @@ func convAttrValue(value attribute.Value) telemetry.Value { return telemetry.Value{} } +// truncate returns a truncated version of s such that it contains less than +// the limit number of characters. Truncation is applied by returning the limit +// number of valid characters contained in s. +// +// If limit is negative, it returns the original string. +// +// UTF-8 is supported. When truncating, all invalid characters are dropped +// before applying truncation. +// +// If s already contains less than the limit number of bytes, it is returned +// unchanged. No invalid characters are removed. +func truncate(limit int, s string) string { + // This prioritize performance in the following order based on the most + // common expected use-cases. + // + // - Short values less than the default limit (128). + // - Strings with valid encodings that exceed the limit. + // - No limit. + // - Strings with invalid encodings that exceed the limit. + if limit < 0 || len(s) <= limit { + return s + } + + // Optimistically, assume all valid UTF-8. + var b strings.Builder + count := 0 + for i, c := range s { + if c != utf8.RuneError { + count++ + if count > limit { + return s[:i] + } + continue + } + + _, size := utf8.DecodeRuneInString(s[i:]) + if size == 1 { + // Invalid encoding. + b.Grow(len(s) - 1) + _, _ = b.WriteString(s[:i]) + s = s[i:] + break + } + } + + // Fast-path, no invalid input. + if b.Cap() == 0 { + return s + } + + // Truncate while validating UTF-8. + for i := 0; i < len(s) && count < limit; { + c := s[i] + if c < utf8.RuneSelf { + // Optimization for single byte runes (common case). + _ = b.WriteByte(c) + i++ + count++ + continue + } + + _, size := utf8.DecodeRuneInString(s[i:]) + if size == 1 { + // We checked for all 1-byte runes above, this is a RuneError. + i++ + continue + } + + _, _ = b.WriteString(s[i : i+size]) + i += size + count++ + } + + return b.String() +} + func (s *span) End(opts ...trace.SpanEndOption) { if s == nil || !s.sampled.Swap(false) { return diff --git a/sdk/span_test.go b/sdk/span_test.go index e855aa4b4..0a077ea25 100644 --- a/sdk/span_test.go +++ b/sdk/span_test.go @@ -494,6 +494,75 @@ func TestSpanAttributeLimits(t *testing.T) { } } +func TestSpanAttributeValueLimits(t *testing.T) { + value := "hello world" + + aStr := attribute.String("string", value) + aStrSlice := attribute.StringSlice("slice", []string{value, value}) + + eq := func(a, b []telemetry.Attr) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !a[i].Equal(b[i]) { + return false + } + } + return true + } + + tests := []struct { + limit int + want string + }{ + {0, ""}, + {2, value[:2]}, + {11, value}, + {-1, value}, + } + for _, test := range tests { + t.Run("Limit/"+strconv.Itoa(test.limit), func(t *testing.T) { + orig := maxSpan.AttrValueLen + maxSpan.AttrValueLen = test.limit + t.Cleanup(func() { maxSpan.AttrValueLen = orig }) + + builder := spanBuilder{} + + want := []telemetry.Attr{ + telemetry.String("string", test.want), + telemetry.Slice( + "slice", + telemetry.StringValue(test.want), + telemetry.StringValue(test.want), + ), + } + + s := builder.Build() + s.SetAttributes(aStr, aStrSlice) + assert.Truef(t, eq(want, s.span.Attrs), "set span attributes: got %#v, want %#v", s.span.Attrs, want) + + s.AddEvent("test", trace.WithAttributes(aStr, aStrSlice)) + assert.Truef(t, eq(want, s.span.Events[0].Attrs), "span event attributes: got %#v, want %#v", s.span.Events[0].Attrs, want) + + s.AddLink(trace.Link{ + Attributes: []attribute.KeyValue{aStr, aStrSlice}, + }) + assert.Truef(t, eq(want, s.span.Links[0].Attrs), "span link attributes: got %#v, want %#v", s.span.Links[0].Attrs, want) + + builder.Options = []trace.SpanStartOption{ + trace.WithAttributes(aStr, aStrSlice), + trace.WithLinks(trace.Link{ + Attributes: []attribute.KeyValue{aStr, aStrSlice}, + }), + } + s = builder.Build() + assert.Truef(t, eq(want, s.span.Attrs), "new span attributes: got %#v, want %#v", s.span.Attrs, want) + assert.Truef(t, eq(want, s.span.Links[0].Attrs), "new span link attributes: got %#v, want %#v", s.span.Attrs, want) + }) + } +} + func TestSpanTracerProvider(t *testing.T) { var s span @@ -522,6 +591,150 @@ func (b spanBuilder) Build() *span { return s } +func TestTruncate(t *testing.T) { + type group struct { + limit int + input string + expected string + } + + tests := []struct { + name string + groups []group + }{ + // Edge case: limit is negative, no truncation should occur + { + name: "NoTruncation", + groups: []group{ + {-1, "No truncation!", "No truncation!"}, + }, + }, + + // Edge case: string is already shorter than the limit, no truncation + // should occur + { + name: "ShortText", + groups: []group{ + {10, "Short text", "Short text"}, + {15, "Short text", "Short text"}, + {100, "Short text", "Short text"}, + }, + }, + + // Edge case: truncation happens with ASCII characters only + { + name: "ASCIIOnly", + groups: []group{ + {1, "Hello World!", "H"}, + {5, "Hello World!", "Hello"}, + {12, "Hello World!", "Hello World!"}, + }, + }, + + // Truncation including multi-byte characters (UTF-8) + { + name: "ValidUTF-8", + groups: []group{ + {7, "Hello, 世界", "Hello, "}, + {8, "Hello, 世界", "Hello, 世"}, + {2, "こんにちは", "こん"}, + {3, "こんにちは", "こんに"}, + {5, "こんにちは", "こんにちは"}, + {12, "こんにちは", "こんにちは"}, + }, + }, + + // Truncation with invalid UTF-8 characters + { + name: "InvalidUTF-8", + groups: []group{ + {11, "Invalid\x80text", "Invalidtext"}, + // Do not modify invalid text if equal to limit. + {11, "Valid text\x80", "Valid text\x80"}, + // Do not modify invalid text if under limit. + {15, "Valid text\x80", "Valid text\x80"}, + {5, "Hello\x80World", "Hello"}, + {11, "Hello\x80World\x80!", "HelloWorld!"}, + {15, "Hello\x80World\x80Test", "HelloWorldTest"}, + {15, "Hello\x80\x80\x80World\x80Test", "HelloWorldTest"}, + {15, "\x80\x80\x80Hello\x80\x80\x80World\x80Test\x80\x80", "HelloWorldTest"}, + }, + }, + + // Truncation with mixed validn and invalid UTF-8 characters + { + name: "MixedUTF-8", + groups: []group{ + {6, "€"[0:2] + "hello€€", "hello€"}, + {6, "€" + "€"[0:2] + "hello", "€hello"}, + {11, "Valid text\x80📜", "Valid text📜"}, + {11, "Valid text📜\x80", "Valid text📜"}, + {14, "😊 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"}, + {14, "😊\x80 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"}, + {14, "😊\x80 Hello\x80World🌍\x80🚀", "😊 HelloWorld🌍🚀"}, + {14, "😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"}, + {14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"}, + }, + }, + + // Edge case: empty string, should return empty string + { + name: "Empty", + groups: []group{ + {5, "", ""}, + }, + }, + + // Edge case: limit is 0, should return an empty string + { + name: "Zero", + groups: []group{ + {0, "Some text", ""}, + {0, "", ""}, + }, + }, + } + + for _, tt := range tests { + for _, g := range tt.groups { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := truncate(g.limit, g.input) + assert.Equalf( + t, g.expected, got, + "input: %q([]rune%v))\ngot: %q([]rune%v)\nwant %q([]rune%v)", + g.input, []rune(g.input), + got, []rune(got), + g.expected, []rune(g.expected), + ) + }) + } + } +} + +func BenchmarkTruncate(b *testing.B) { + run := func(limit int, input string) func(b *testing.B) { + return func(b *testing.B) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + var out string + for pb.Next() { + out = truncate(limit, input) + } + _ = out + }) + } + } + b.Run("Unlimited", run(-1, "hello 😊 world 🌍🚀")) + b.Run("Zero", run(0, "Some text")) + b.Run("Short", run(10, "Short Text")) + b.Run("ASCII", run(5, "Hello, World!")) + b.Run("ValidUTF-8", run(10, "hello 😊 world 🌍🚀")) + b.Run("InvalidUTF-8", run(6, "€"[0:2]+"hello€€")) + b.Run("MixedUTF-8", run(14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80")) +} + func TestSpanConcurrentSafe(t *testing.T) { t.Parallel()