From c66abb3245c5eef847b4ad6b0169f75b74723c19 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Wed, 29 Jun 2022 23:36:18 +0100 Subject: [PATCH] tracing: remove panic in Finish In #81079, we added an assertion that failed if a child recording of a RecordingStructured span had more than one span recording. However, this is problematic for couple of reasons: 1) The assertion was on a code path shared with RecordingVerbose spans; and, 2) A RecordingStructured span can have a RecordingVerbose child. The RecordingVerbose child is likely to have more than one span recording. As a result, we have seen roachtests failing with panic: RecordingStructured has 12 recordings; expected 1 Here, we remove the assertion. Release note: None --- pkg/util/tracing/crdbspan.go | 11 +++---- pkg/util/tracing/span_test.go | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go index bd946cf1d04a..affa37cec68c 100644 --- a/pkg/util/tracing/crdbspan.go +++ b/pkg/util/tracing/crdbspan.go @@ -546,12 +546,11 @@ func (s *crdbSpan) recordFinishedChildrenLocked(childRecording tracingpb.Recordi // records by falling through. fallthrough case tracingpb.RecordingStructured: - if len(childRecording) != 1 { - panic(fmt.Sprintf("RecordingStructured has %d recordings; expected 1", len(childRecording))) - } - - for i := range rootChild.StructuredRecords { - s.recordInternalLocked(&rootChild.StructuredRecords[i], &s.mu.recording.structured) + for ci := range childRecording { + child := &childRecording[ci] + for i := range child.StructuredRecords { + s.recordInternalLocked(&child.StructuredRecords[i], &s.mu.recording.structured) + } } case tracingpb.RecordingOff: break diff --git a/pkg/util/tracing/span_test.go b/pkg/util/tracing/span_test.go index a607ba6d6d22..6a098e0204f3 100644 --- a/pkg/util/tracing/span_test.go +++ b/pkg/util/tracing/span_test.go @@ -410,6 +410,63 @@ func TestRecordingMaxSpans(t *testing.T) { require.Len(t, root.StructuredRecords, extraChildren) } +// TestRecordingDowngradesToStructuredIfTooBig finishes a span that has reached +// the maximum number of recorded spans and asserts that its structured +// recordings are correctly added to the parent. +func TestRecordingDowngradesToStructuredIfTooBig(t *testing.T) { + now := timeutil.Now() + clock := timeutil.NewManualTime(now) + tr := NewTracerWithOpt(context.Background(), WithTestingKnobs(TracerTestingKnobs{Clock: clock})) + + s1 := tr.StartSpan("p", WithRecording(tracingpb.RecordingVerbose)) + s2 := tr.StartSpan("c", WithParent(s1)) + extraChildren := 10 + numChildren := maxRecordedSpansPerTrace + extraChildren + payload := &types.Int32Value{Value: int32(1)} + for i := 0; i < numChildren; i++ { + child := tr.StartSpan(fmt.Sprintf("cc%d", i), WithParent(s2)) + child.RecordStructured(payload) + child.Finish() + } + + // We expect recordings from sp and up to the maximum number of spans and + // structured records from all spans over the max. + rec := s2.FinishAndGetConfiguredRecording() + require.Len(t, rec, maxRecordedSpansPerTrace+1) + require.Len(t, rec[0].StructuredRecords, extraChildren) + + pl, err := types.MarshalAny(payload) + require.NoError(t, err) + structuredRecordSize := (&tracingpb.StructuredRecord{Time: now, Payload: pl}).MemorySize() + maxNumStructuredRecordings := maxStructuredBytesPerSpan / structuredRecordSize + if maxNumStructuredRecordings > numChildren { + maxNumStructuredRecordings = numChildren + } + + // Since s2's child count exceeded the maximum, we don't expect to see any of + // its span recordings in s1. But, we should only see as many of s2's + // structured recordings as possible. + rec2 := s1.FinishAndGetConfiguredRecording() + require.Len(t, rec2, 1) + require.Len(t, rec2[0].StructuredRecords, maxNumStructuredRecordings) +} + +// Test that a RecordingStructured parent does not panic when asked to ingest a +// remote verbose recording. Ingesting a recording of different type is unusual, +// since children are created with the parent's recording mode, but it can +// happen if the child's recording mode was changed dynamically. +func TestRemoteSpanWithDifferentRecordingMode(t *testing.T) { + tr := NewTracer() + s1 := tr.StartSpan("p", WithRecording(tracingpb.RecordingStructured)) + s2 := tr.StartSpan("c", WithRemoteParentFromSpanMeta(s1.Meta()), WithRecording(tracingpb.RecordingVerbose)) + s3 := tr.StartSpan("cc", WithParent(s2), WithRecording(tracingpb.RecordingVerbose)) + s3.Finish() + r := s2.FinishAndGetConfiguredRecording() + require.NotPanics(t, func() { s1.ImportRemoteRecording(r) }) + r2 := s1.FinishAndGetConfiguredRecording() + require.Len(t, r2, 1) +} + type explodyNetTr struct { trace.Trace }