From a19073aebcddd3e1b0459039ee92be72b00deac5 Mon Sep 17 00:00:00 2001
From: Tyler Yahn <codingalias@gmail.com>
Date: Sun, 24 Nov 2024 08:58:27 -0800
Subject: [PATCH] Add attribute value limits

Include tests for span attribute limits.
---
 CHANGELOG.md     |   1 +
 sdk/span.go      |  82 +++++++++++++++++-
 sdk/span_test.go | 213 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b216387d..ce56b8cf2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ OpenTelemetry Go Automatic Instrumentation adheres to [Semantic Versioning](http
 ### Added
 
 - Add support for span attribute limits to `go.opentelemtry.io/auto/sdk`. ([#1315](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1315))
+- Add support for attribute value limits to `go.opentelemtry.io/auto/sdk`. ([#1325](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1325))
 
 ## [v0.18.0-alpha] - 2024-11-20
 
diff --git a/sdk/span.go b/sdk/span.go
index e81a13014..90d6e6373 100644
--- a/sdk/span.go
+++ b/sdk/span.go
@@ -8,9 +8,11 @@ import (
 	"fmt"
 	"reflect"
 	"runtime"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
+	"unicode/utf8"
 
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/codes"
@@ -159,7 +161,8 @@ func convAttrValue(value attribute.Value) telemetry.Value {
 	case attribute.FLOAT64:
 		return telemetry.Float64Value(value.AsFloat64())
 	case attribute.STRING:
-		return telemetry.StringValue(value.AsString())
+		v := truncate(maxSpan.AttrValueLen, value.AsString())
+		return telemetry.StringValue(v)
 	case attribute.BOOLSLICE:
 		slice := value.AsBoolSlice()
 		out := make([]telemetry.Value, 0, len(slice))
@@ -185,6 +188,7 @@ func convAttrValue(value attribute.Value) telemetry.Value {
 		slice := value.AsStringSlice()
 		out := make([]telemetry.Value, 0, len(slice))
 		for _, v := range slice {
+			v = truncate(maxSpan.AttrValueLen, v)
 			out = append(out, telemetry.StringValue(v))
 		}
 		return telemetry.SliceValue(out...)
@@ -192,6 +196,82 @@ func convAttrValue(value attribute.Value) telemetry.Value {
 	return telemetry.Value{}
 }
 
+// truncate returns a truncated version of s such that it contains less than
+// the limit number of characters. Truncation is applied by returning the limit
+// number of valid characters contained in s.
+//
+// If limit is negative, it returns the original string.
+//
+// UTF-8 is supported. When truncating, all invalid characters are dropped
+// before applying truncation.
+//
+// If s already contains less than the limit number of bytes, it is returned
+// unchanged. No invalid characters are removed.
+func truncate(limit int, s string) string {
+	// This prioritize performance in the following order based on the most
+	// common expected use-cases.
+	//
+	//  - Short values less than the default limit (128).
+	//  - Strings with valid encodings that exceed the limit.
+	//  - No limit.
+	//  - Strings with invalid encodings that exceed the limit.
+	if limit < 0 || len(s) <= limit {
+		return s
+	}
+
+	// Optimistically, assume all valid UTF-8.
+	var b strings.Builder
+	count := 0
+	for i, c := range s {
+		if c != utf8.RuneError {
+			count++
+			if count > limit {
+				return s[:i]
+			}
+			continue
+		}
+
+		_, size := utf8.DecodeRuneInString(s[i:])
+		if size == 1 {
+			// Invalid encoding.
+			b.Grow(len(s) - 1)
+			_, _ = b.WriteString(s[:i])
+			s = s[i:]
+			break
+		}
+	}
+
+	// Fast-path, no invalid input.
+	if b.Cap() == 0 {
+		return s
+	}
+
+	// Truncate while validating UTF-8.
+	for i := 0; i < len(s) && count < limit; {
+		c := s[i]
+		if c < utf8.RuneSelf {
+			// Optimization for single byte runes (common case).
+			_ = b.WriteByte(c)
+			i++
+			count++
+			continue
+		}
+
+		_, size := utf8.DecodeRuneInString(s[i:])
+		if size == 1 {
+			// We checked for all 1-byte runes above, this is a RuneError.
+			i++
+			continue
+		}
+
+		_, _ = b.WriteString(s[i : i+size])
+		i += size
+		count++
+	}
+
+	return b.String()
+}
+
 func (s *span) End(opts ...trace.SpanEndOption) {
 	if s == nil || !s.sampled.Swap(false) {
 		return
diff --git a/sdk/span_test.go b/sdk/span_test.go
index e855aa4b4..0a077ea25 100644
--- a/sdk/span_test.go
+++ b/sdk/span_test.go
@@ -494,6 +494,75 @@ func TestSpanAttributeLimits(t *testing.T) {
 	}
 }
 
+func TestSpanAttributeValueLimits(t *testing.T) {
+	value := "hello world"
+
+	aStr := attribute.String("string", value)
+	aStrSlice := attribute.StringSlice("slice", []string{value, value})
+
+	eq := func(a, b []telemetry.Attr) bool {
+		if len(a) != len(b) {
+			return false
+		}
+		for i := range a {
+			if !a[i].Equal(b[i]) {
+				return false
+			}
+		}
+		return true
+	}
+
+	tests := []struct {
+		limit int
+		want  string
+	}{
+		{0, ""},
+		{2, value[:2]},
+		{11, value},
+		{-1, value},
+	}
+	for _, test := range tests {
+		t.Run("Limit/"+strconv.Itoa(test.limit), func(t *testing.T) {
+			orig := maxSpan.AttrValueLen
+			maxSpan.AttrValueLen = test.limit
+			t.Cleanup(func() { maxSpan.AttrValueLen = orig })
+
+			builder := spanBuilder{}
+
+			want := []telemetry.Attr{
+				telemetry.String("string", test.want),
+				telemetry.Slice(
+					"slice",
+					telemetry.StringValue(test.want),
+					telemetry.StringValue(test.want),
+				),
+			}
+
+			s := builder.Build()
+			s.SetAttributes(aStr, aStrSlice)
+			assert.Truef(t, eq(want, s.span.Attrs), "set span attributes: got %#v, want %#v", s.span.Attrs, want)
+
+			s.AddEvent("test", trace.WithAttributes(aStr, aStrSlice))
+			assert.Truef(t, eq(want, s.span.Events[0].Attrs), "span event attributes: got %#v, want %#v", s.span.Events[0].Attrs, want)
+
+			s.AddLink(trace.Link{
+				Attributes: []attribute.KeyValue{aStr, aStrSlice},
+			})
+			assert.Truef(t, eq(want, s.span.Links[0].Attrs), "span link attributes: got %#v, want %#v", s.span.Links[0].Attrs, want)
+
+			builder.Options = []trace.SpanStartOption{
+				trace.WithAttributes(aStr, aStrSlice),
+				trace.WithLinks(trace.Link{
+					Attributes: []attribute.KeyValue{aStr, aStrSlice},
+				}),
+			}
+			s = builder.Build()
+			assert.Truef(t, eq(want, s.span.Attrs), "new span attributes: got %#v, want %#v", s.span.Attrs, want)
+			assert.Truef(t, eq(want, s.span.Links[0].Attrs), "new span link attributes: got %#v, want %#v", s.span.Attrs, want)
+		})
+	}
+}
+
 func TestSpanTracerProvider(t *testing.T) {
 	var s span
 
@@ -522,6 +591,150 @@ func (b spanBuilder) Build() *span {
 	return s
 }
 
+func TestTruncate(t *testing.T) {
+	type group struct {
+		limit    int
+		input    string
+		expected string
+	}
+
+	tests := []struct {
+		name   string
+		groups []group
+	}{
+		// Edge case: limit is negative, no truncation should occur
+		{
+			name: "NoTruncation",
+			groups: []group{
+				{-1, "No truncation!", "No truncation!"},
+			},
+		},
+
+		// Edge case: string is already shorter than the limit, no truncation
+		// should occur
+		{
+			name: "ShortText",
+			groups: []group{
+				{10, "Short text", "Short text"},
+				{15, "Short text", "Short text"},
+				{100, "Short text", "Short text"},
+			},
+		},
+
+		// Edge case: truncation happens with ASCII characters only
+		{
+			name: "ASCIIOnly",
+			groups: []group{
+				{1, "Hello World!", "H"},
+				{5, "Hello World!", "Hello"},
+				{12, "Hello World!", "Hello World!"},
+			},
+		},
+
+		// Truncation including multi-byte characters (UTF-8)
+		{
+			name: "ValidUTF-8",
+			groups: []group{
+				{7, "Hello, 世界", "Hello, "},
+				{8, "Hello, 世界", "Hello, 世"},
+				{2, "こんにちは", "こん"},
+				{3, "こんにちは", "こんに"},
+				{5, "こんにちは", "こんにちは"},
+				{12, "こんにちは", "こんにちは"},
+			},
+		},
+
+		// Truncation with invalid UTF-8 characters
+		{
+			name: "InvalidUTF-8",
+			groups: []group{
+				{11, "Invalid\x80text", "Invalidtext"},
+				// Do not modify invalid text if equal to limit.
+				{11, "Valid text\x80", "Valid text\x80"},
+				// Do not modify invalid text if under limit.
+				{15, "Valid text\x80", "Valid text\x80"},
+				{5, "Hello\x80World", "Hello"},
+				{11, "Hello\x80World\x80!", "HelloWorld!"},
+				{15, "Hello\x80World\x80Test", "HelloWorldTest"},
+				{15, "Hello\x80\x80\x80World\x80Test", "HelloWorldTest"},
+				{15, "\x80\x80\x80Hello\x80\x80\x80World\x80Test\x80\x80", "HelloWorldTest"},
+			},
+		},
+
+		// Truncation with mixed validn and invalid UTF-8 characters
+		{
+			name: "MixedUTF-8",
+			groups: []group{
+				{6, "€"[0:2] + "hello€€", "hello€"},
+				{6, "€" + "€"[0:2] + "hello", "€hello"},
+				{11, "Valid text\x80📜", "Valid text📜"},
+				{11, "Valid text📜\x80", "Valid text📜"},
+				{14, "😊 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"},
+				{14, "😊\x80 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"},
+				{14, "😊\x80 Hello\x80World🌍\x80🚀", "😊 HelloWorld🌍🚀"},
+				{14, "😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"},
+				{14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"},
+			},
+		},
+
+		// Edge case: empty string, should return empty string
+		{
+			name: "Empty",
+			groups: []group{
+				{5, "", ""},
+			},
+		},
+
+		// Edge case: limit is 0, should return an empty string
+		{
+			name: "Zero",
+			groups: []group{
+				{0, "Some text", ""},
+				{0, "", ""},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		for _, g := range tt.groups {
+			t.Run(tt.name, func(t *testing.T) {
+				t.Parallel()
+
+				got := truncate(g.limit, g.input)
+				assert.Equalf(
+					t, g.expected, got,
+					"input: %q([]rune%v))\ngot: %q([]rune%v)\nwant %q([]rune%v)",
+					g.input, []rune(g.input),
+					got, []rune(got),
+					g.expected, []rune(g.expected),
+				)
+			})
+		}
+	}
+}
+
+func BenchmarkTruncate(b *testing.B) {
+	run := func(limit int, input string) func(b *testing.B) {
+		return func(b *testing.B) {
+			b.ReportAllocs()
+			b.RunParallel(func(pb *testing.PB) {
+				var out string
+				for pb.Next() {
+					out = truncate(limit, input)
+				}
+				_ = out
+			})
+		}
+	}
+	b.Run("Unlimited", run(-1, "hello 😊 world 🌍🚀"))
+	b.Run("Zero", run(0, "Some text"))
+	b.Run("Short", run(10, "Short Text"))
+	b.Run("ASCII", run(5, "Hello, World!"))
+	b.Run("ValidUTF-8", run(10, "hello 😊 world 🌍🚀"))
+	b.Run("InvalidUTF-8", run(6, "€"[0:2]+"hello€€"))
+	b.Run("MixedUTF-8", run(14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80"))
+}
+
 func TestSpanConcurrentSafe(t *testing.T) {
 	t.Parallel()