From bd6105f27b387abbfab0face4a7c00ec028763a0 Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Thu, 16 Dec 2021 02:34:09 -0800 Subject: [PATCH] [translator/jaeger] Parse and set Jaeger status as the OpenTelemetry specification defines (#6682) * Parse/set Jaeger status as OTel spec defines The OpenTelemetry specification defines the OpenTelemetry span status is communicated with Jaeger types as tags having string values of ERROR or OK. Currently, this is transmitted as an integer representation of the pdata span status value. This changes that to comply with the OpenTelemetry specification. The error tag for Jaeger spans takes precedence over the OpenTelemetry status value in defining if a span is errored. This changes the Jaeger proto to traces transformation to not overwrite the status with the OpenTelemetry status. * Add changes to changelog * Revert to just check code fits int32 * Update sapmreceiver to not rely on status bug The sapmreceiver tested for an invalid OTel status code to be passed through to the underlying span status. This behavior has been fixed and the underlying span status is no longer set to an invalid value, therefore the sapmreceiver have also been updated to expect this corrected behavior. * Update changes in changelog as breaking changes * Remove unused constant --- CHANGELOG.md | 3 + pkg/translator/jaeger/constants.go | 10 +++ .../jaeger/jaegerproto_to_traces.go | 77 ++++++++++++++----- .../jaeger/jaegerproto_to_traces_test.go | 45 ++++++----- .../jaeger/jaegerthrift_to_traces_test.go | 12 +-- .../jaeger/traces_to_jaegerproto.go | 20 +++-- .../jaeger/traces_to_jaegerproto_test.go | 18 ++--- receiver/sapmreceiver/trace_receiver_test.go | 13 +--- results/TESTRESULTS.md | 0 9 files changed, 125 insertions(+), 73 deletions(-) create mode 100644 results/TESTRESULTS.md diff --git a/CHANGELOG.md b/CHANGELOG.md index d7460dcff6f1..b080b07c8ba6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ ## 🛑 Breaking changes 🛑 - `memcachedreceiver`: Update metric names (#6594) +- `sapm` receiver: Use Jaeger status values instead of OpenCensus (#6682) +- `jaeger` receiver/exporter: Parse/set Jaeger status with OTel spec values (#6682) + ## 🚀 New components 🚀 ## 🧰 Bug fixes 🧰 diff --git a/pkg/translator/jaeger/constants.go b/pkg/translator/jaeger/constants.go index cc002e019edc..ad8df7d71011 100644 --- a/pkg/translator/jaeger/constants.go +++ b/pkg/translator/jaeger/constants.go @@ -18,7 +18,17 @@ import ( "errors" ) +// Status tag values as defined by the OpenTelemetry specification: +// https://github.com/open-telemetry/opentelemetry-specification/blob/v1.8.0/specification/trace/sdk_exporters/non-otlp.md#span-status +const ( + statusError = "ERROR" + statusOk = "OK" +) + var ( errZeroTraceID = errors.New("span has an all zeros trace ID") errZeroSpanID = errors.New("span has an all zeros span ID") + + // errType indicates that a value is not convertible to the target type. + errType = errors.New("invalid type") ) diff --git a/pkg/translator/jaeger/jaegerproto_to_traces.go b/pkg/translator/jaeger/jaegerproto_to_traces.go index 081e8210f0f9..c86d912fbd29 100644 --- a/pkg/translator/jaeger/jaegerproto_to_traces.go +++ b/pkg/translator/jaeger/jaegerproto_to_traces.go @@ -19,6 +19,7 @@ import ( "fmt" "reflect" "strconv" + "strings" "github.com/jaegertracing/jaeger/model" "go.opentelemetry.io/collector/model/pdata" @@ -217,25 +218,43 @@ func setInternalSpanStatus(attrs pdata.AttributeMap, dest pdata.SpanStatus) { statusCode = pdata.StatusCodeError attrs.Delete(tracetranslator.TagError) statusExists = true + + if desc, ok := extractStatusDescFromAttr(attrs); ok { + statusMessage = desc + } else if descAttr, ok := attrs.Get(tracetranslator.TagHTTPStatusMsg); ok { + statusMessage = descAttr.StringVal() + } } } if codeAttr, ok := attrs.Get(conventions.OtelStatusCode); ok { - statusExists = true - if code, err := getStatusCodeValFromAttr(codeAttr); err == nil { - statusCode = pdata.StatusCode(code) - attrs.Delete(conventions.OtelStatusCode) - } - if msgAttr, ok := attrs.Get(conventions.OtelStatusDescription); ok { - statusMessage = msgAttr.StringVal() - attrs.Delete(conventions.OtelStatusDescription) + if !statusExists { + // The error tag is the ultimate truth for a Jaeger spans' error + // status. Only parse the otel.status_code tag if the error tag is + // not set to true. + statusExists = true + switch strings.ToUpper(codeAttr.StringVal()) { + case statusOk: + statusCode = pdata.StatusCodeOk + case statusError: + statusCode = pdata.StatusCodeError + } + + if desc, ok := extractStatusDescFromAttr(attrs); ok { + statusMessage = desc + } } - } else if httpCodeAttr, ok := attrs.Get(conventions.AttributeHTTPStatusCode); ok { - statusExists = true + // Regardless of error tag value, remove the otel.status_code tag. The + // otel.status_message tag will have already been removed if + // statusExists is true. + attrs.Delete(conventions.OtelStatusCode) + } else if httpCodeAttr, ok := attrs.Get(conventions.AttributeHTTPStatusCode); !statusExists && ok { + // Fallback to introspecting if this span represents a failed HTTP + // request or response, but again, only do so if the `error` tag was + // not set to true and no explicit status was sent. if code, err := getStatusCodeFromHTTPStatusAttr(httpCodeAttr); err == nil { - - // Do not set status code in case it was set to Unset. if code != pdata.StatusCodeUnset { + statusExists = true statusCode = code } @@ -251,27 +270,43 @@ func setInternalSpanStatus(attrs pdata.AttributeMap, dest pdata.SpanStatus) { } } -func getStatusCodeValFromAttr(attrVal pdata.AttributeValue) (int64, error) { - var codeVal int64 +// extractStatusDescFromAttr returns the OTel status description from attrs +// along with true if it is set. Otherwise, an empty string and false are +// returned. The OTel status description attribute is deleted from attrs in +// the process. +func extractStatusDescFromAttr(attrs pdata.AttributeMap) (string, bool) { + if msgAttr, ok := attrs.Get(conventions.OtelStatusDescription); ok { + msg := msgAttr.StringVal() + attrs.Delete(conventions.OtelStatusDescription) + return msg, true + } + return "", false +} + +// codeFromAttr returns the integer code value from attrVal. An error is +// returned if the code is not represented by an integer or string value in +// the attrVal or the value is outside the bounds of an int representation. +func codeFromAttr(attrVal pdata.AttributeValue) (int64, error) { + var val int64 switch attrVal.Type() { case pdata.AttributeValueTypeInt: - codeVal = attrVal.IntVal() + val = attrVal.IntVal() case pdata.AttributeValueTypeString: - i, err := strconv.Atoi(attrVal.StringVal()) + var err error + val, err = strconv.ParseInt(attrVal.StringVal(), 10, 0) if err != nil { return 0, err } - codeVal = int64(i) default: - return 0, fmt.Errorf("invalid status code attribute type: %s", attrVal.Type().String()) + return 0, fmt.Errorf("%w: %s", errType, attrVal.Type().String()) } - return codeVal, nil + return val, nil } func getStatusCodeFromHTTPStatusAttr(attrVal pdata.AttributeValue) (pdata.StatusCode, error) { - statusCode, err := getStatusCodeValFromAttr(attrVal) + statusCode, err := codeFromAttr(attrVal) if err != nil { - return pdata.StatusCodeOk, err + return pdata.StatusCodeUnset, err } return tracetranslator.StatusCodeFromHTTP(statusCode), nil diff --git a/pkg/translator/jaeger/jaegerproto_to_traces_test.go b/pkg/translator/jaeger/jaegerproto_to_traces_test.go index 6db9e94579ab..48cb296485a6 100644 --- a/pkg/translator/jaeger/jaegerproto_to_traces_test.go +++ b/pkg/translator/jaeger/jaegerproto_to_traces_test.go @@ -16,7 +16,6 @@ package jaeger import ( "encoding/binary" - "fmt" "strconv" "testing" "time" @@ -42,9 +41,7 @@ var ( testSpanEndTimestamp = pdata.NewTimestampFromTime(testSpanEndTime) ) -func TestGetStatusCodeValFromAttr(t *testing.T) { - _, invalidNumErr := strconv.Atoi("inf") - +func TestCodeFromAttr(t *testing.T) { tests := []struct { name string attr pdata.AttributeValue @@ -55,35 +52,37 @@ func TestGetStatusCodeValFromAttr(t *testing.T) { name: "ok-string", attr: pdata.NewAttributeValueString("0"), code: 0, - err: nil, }, { name: "ok-int", attr: pdata.NewAttributeValueInt(1), code: 1, - err: nil, }, { name: "wrong-type", attr: pdata.NewAttributeValueBool(true), code: 0, - err: fmt.Errorf("invalid status code attribute type: BOOL"), + err: errType, }, { name: "invalid-string", attr: pdata.NewAttributeValueString("inf"), code: 0, - err: invalidNumErr, + err: strconv.ErrSyntax, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - code, err := getStatusCodeValFromAttr(test.attr) - assert.EqualValues(t, test.err, err) + code, err := codeFromAttr(test.attr) + if test.err != nil { + assert.ErrorIs(t, err, test.err) + } else { + assert.NoError(t, err) + } assert.Equal(t, test.code, code) }) } @@ -356,9 +355,9 @@ func TestSetInternalSpanStatus(t *testing.T) { attrsModifiedLen: 0, }, { - name: "status.code is set as int", + name: "status.code is set as string", attrs: pdata.NewAttributeMapFromMap(map[string]pdata.AttributeValue{ - conventions.OtelStatusCode: pdata.NewAttributeValueInt(1), + conventions.OtelStatusCode: pdata.NewAttributeValueString(statusOk), }), status: okStatus, attrsModifiedLen: 0, @@ -367,7 +366,7 @@ func TestSetInternalSpanStatus(t *testing.T) { name: "status.code, status.message and error tags are set", attrs: pdata.NewAttributeMapFromMap(map[string]pdata.AttributeValue{ tracetranslator.TagError: pdata.NewAttributeValueBool(true), - conventions.OtelStatusCode: pdata.NewAttributeValueInt(int64(pdata.StatusCodeError)), + conventions.OtelStatusCode: pdata.NewAttributeValueString(statusError), conventions.OtelStatusDescription: pdata.NewAttributeValueString("Error: Invalid argument"), }), status: errorStatusWithMessage, @@ -394,7 +393,7 @@ func TestSetInternalSpanStatus(t *testing.T) { { name: "status.code has precedence over http.status_code.", attrs: pdata.NewAttributeMapFromMap(map[string]pdata.AttributeValue{ - conventions.OtelStatusCode: pdata.NewAttributeValueInt(1), + conventions.OtelStatusCode: pdata.NewAttributeValueString(statusOk), conventions.AttributeHTTPStatusCode: pdata.NewAttributeValueInt(500), tracetranslator.TagHTTPStatusMsg: pdata.NewAttributeValueString("Server Error"), }), @@ -597,9 +596,9 @@ func generateProtoSpan() *model.Span { VStr: string(tracetranslator.OpenTracingSpanKindClient), }, { - Key: conventions.OtelStatusCode, - VType: model.ValueType_INT64, - VInt64: int64(pdata.StatusCodeError), + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusError, }, { Key: tracetranslator.TagError, @@ -675,9 +674,9 @@ func generateProtoSpanWithTraceState() *model.Span { VStr: string(tracetranslator.OpenTracingSpanKindClient), }, { - Key: conventions.OtelStatusCode, - VType: model.ValueType_INT64, - VInt64: int64(pdata.StatusCodeError), + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusError, }, { Key: tracetranslator.TagError, @@ -785,9 +784,9 @@ func generateProtoFollowerSpan() *model.Span { VStr: string(tracetranslator.OpenTracingSpanKindConsumer), }, { - Key: conventions.OtelStatusCode, - VType: model.ValueType_INT64, - VInt64: int64(pdata.StatusCodeOk), + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusOk, }, { Key: conventions.OtelStatusDescription, diff --git a/pkg/translator/jaeger/jaegerthrift_to_traces_test.go b/pkg/translator/jaeger/jaegerthrift_to_traces_test.go index d84e61a768bd..1383aaf60622 100644 --- a/pkg/translator/jaeger/jaegerthrift_to_traces_test.go +++ b/pkg/translator/jaeger/jaegerthrift_to_traces_test.go @@ -155,7 +155,7 @@ func generateThriftSpan() *jaeger.Span { intAttrVal := int64(123) eventName := "event-with-attr" eventStrAttrVal := "span-event-attr-val" - statusCode := int64(pdata.StatusCodeError) + statusCode := statusError statusMsg := "status-cancelled" kind := string(tracetranslator.OpenTracingSpanKindClient) @@ -196,8 +196,8 @@ func generateThriftSpan() *jaeger.Span { Tags: []*jaeger.Tag{ { Key: conventions.OtelStatusCode, - VType: jaeger.TagType_LONG, - VLong: &statusCode, + VType: jaeger.TagType_STRING, + VStr: &statusCode, }, { Key: conventions.OtelStatusDescription, @@ -243,7 +243,7 @@ func generateThriftChildSpan() *jaeger.Span { } func generateThriftFollowerSpan() *jaeger.Span { - statusCode := int64(pdata.StatusCodeOk) + statusCode := statusOk statusMsg := "status-ok" kind := string(tracetranslator.OpenTracingSpanKindConsumer) @@ -257,8 +257,8 @@ func generateThriftFollowerSpan() *jaeger.Span { Tags: []*jaeger.Tag{ { Key: conventions.OtelStatusCode, - VType: jaeger.TagType_LONG, - VLong: &statusCode, + VType: jaeger.TagType_STRING, + VStr: &statusCode, }, { Key: conventions.OtelStatusDescription, diff --git a/pkg/translator/jaeger/traces_to_jaegerproto.go b/pkg/translator/jaeger/traces_to_jaegerproto.go index 9c977c4b4255..562a071c2117 100644 --- a/pkg/translator/jaeger/traces_to_jaegerproto.go +++ b/pkg/translator/jaeger/traces_to_jaegerproto.go @@ -381,11 +381,21 @@ func getTagFromSpanKind(spanKind pdata.SpanKind) (model.KeyValue, bool) { } func getTagFromStatusCode(statusCode pdata.StatusCode) (model.KeyValue, bool) { - return model.KeyValue{ - Key: conventions.OtelStatusCode, - VInt64: int64(statusCode), - VType: model.ValueType_INT64, - }, true + switch statusCode { + case pdata.StatusCodeError: + return model.KeyValue{ + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusError, + }, true + case pdata.StatusCodeOk: + return model.KeyValue{ + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusOk, + }, true + } + return model.KeyValue{}, false } func getErrorTagFromStatusCode(statusCode pdata.StatusCode) (model.KeyValue, bool) { diff --git a/pkg/translator/jaeger/traces_to_jaegerproto_test.go b/pkg/translator/jaeger/traces_to_jaegerproto_test.go index f8680d306b5b..c5ea3b3eb302 100644 --- a/pkg/translator/jaeger/traces_to_jaegerproto_test.go +++ b/pkg/translator/jaeger/traces_to_jaegerproto_test.go @@ -37,9 +37,9 @@ func TestGetTagFromStatusCode(t *testing.T) { name: "ok", code: pdata.StatusCodeOk, tag: model.KeyValue{ - Key: conventions.OtelStatusCode, - VInt64: int64(pdata.StatusCodeOk), - VType: model.ValueType_INT64, + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusOk, }, }, @@ -47,9 +47,9 @@ func TestGetTagFromStatusCode(t *testing.T) { name: "error", code: pdata.StatusCodeError, tag: model.KeyValue{ - Key: conventions.OtelStatusCode, - VInt64: int64(pdata.StatusCodeError), - VType: model.ValueType_INT64, + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusError, }, }, } @@ -339,9 +339,9 @@ func TestInternalTracesToJaegerProtoBatchesAndBack(t *testing.T) { func generateProtoChildSpanWithErrorTags() *model.Span { span := generateProtoChildSpan() span.Tags = append(span.Tags, model.KeyValue{ - Key: conventions.OtelStatusCode, - VType: model.ValueType_INT64, - VInt64: int64(pdata.StatusCodeError), + Key: conventions.OtelStatusCode, + VType: model.ValueType_STRING, + VStr: statusError, }) span.Tags = append(span.Tags, model.KeyValue{ Key: tracetranslator.TagError, diff --git a/receiver/sapmreceiver/trace_receiver_test.go b/receiver/sapmreceiver/trace_receiver_test.go index 97af41e1c24b..5b9759ad446e 100644 --- a/receiver/sapmreceiver/trace_receiver_test.go +++ b/receiver/sapmreceiver/trace_receiver_test.go @@ -29,7 +29,6 @@ import ( "github.com/signalfx/sapm-proto/sapmprotocol" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "go.opencensus.io/trace" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/config/confighttp" @@ -63,9 +62,7 @@ func expectedTraceData(t1, t2, t3 time.Time) pdata.Traces { span0.SetName("DBSearch") span0.SetStartTimestamp(pdata.NewTimestampFromTime(t1)) span0.SetEndTimestamp(pdata.NewTimestampFromTime(t2)) - // Set invalid status code that is not with the valid list of value. - // This will be set from incoming invalid code. - span0.Status().SetCode(trace.StatusCodeNotFound) + span0.Status().SetCode(pdata.StatusCodeError) span0.Status().SetMessage("Stale indices") span1 := spans.AppendEmpty() @@ -74,9 +71,7 @@ func expectedTraceData(t1, t2, t3 time.Time) pdata.Traces { span1.SetName("ProxyFetch") span1.SetStartTimestamp(pdata.NewTimestampFromTime(t2)) span1.SetEndTimestamp(pdata.NewTimestampFromTime(t3)) - // Set invalid status code that is not with the valid list of value. - // This will be set from incoming invalid code. - span1.Status().SetCode(trace.StatusCodeInternal) + span1.Status().SetCode(pdata.StatusCodeError) span1.Status().SetMessage("Frontend crash") return traces @@ -106,7 +101,7 @@ func grpcFixture(t1 time.Time) *model.Batch { Duration: 10 * time.Minute, Tags: []model.KeyValue{ model.String(conventions.OtelStatusDescription, "Stale indices"), - model.Int64(conventions.OtelStatusCode, trace.StatusCodeNotFound), + model.String(conventions.OtelStatusCode, "ERROR"), model.Bool("error", true), }, References: []model.SpanRef{ @@ -125,7 +120,7 @@ func grpcFixture(t1 time.Time) *model.Batch { Duration: 2 * time.Second, Tags: []model.KeyValue{ model.String(conventions.OtelStatusDescription, "Frontend crash"), - model.Int64(conventions.OtelStatusCode, trace.StatusCodeInternal), + model.String(conventions.OtelStatusCode, "ERROR"), model.Bool("error", true), }, }, diff --git a/results/TESTRESULTS.md b/results/TESTRESULTS.md new file mode 100644 index 000000000000..e69de29bb2d1