Skip to content

Commit

Permalink
tracing: make full redaction of trace messages configurable
Browse files Browse the repository at this point in the history
When we discovered that tenants could "trace into" KV, we considered
this a security problem since no mechanism ensured that tenants could
receive data they are allowed to see. The KV team reacted to this with
them at the KV-Tenant boundary.

Serverless has shipped with this redactability in place in order to
prevent data leakage across the boundary with KV and now needs to get
back on the `release-21.2` branch to sync with the rest of development.

Unfortunately, making traces redactable had a [significant] performance
overhead. On simple selects, this overhead is in the 10-20% area (when
verbose tracing is enabled).

This presents a stability risk, since some customers run with verbose
tracing enabled for all queries. Once they switch to 21.2, their system
may be underprovisioned as a result of the newly grown tracing overhead
in the `log.{Ve,E}vent{,f}` family of methods. The peformance hit comes
via the additional costs of redacting log strings via the
`redact.Sprintf` function.

This PR changes makes trace redactability a choice. Because we needed to
support mixed-version clusters in cockroachdb#70562 this is not as big a lift as
might have been expected.

The approach taken is that tracers and the spans they generate can be
marked as "redactable". If the flag is enabled, logging events to the
span will use full redaction via `redact.Sprintf`. If the flag is not
enabled a coarse redation of the entire message is done instead. A
further commit will enable redactability on tenant-bound traces since we
do want to maintain that functionality in multi-tenant deployments.

Original issue investigation that kicked off concerns about performance
impact of redaction:
cockroachdb#70110 (comment)

Touches cockroachdb#70562.

Release note: None
  • Loading branch information
tbg authored and dhartunian committed Dec 9, 2021
1 parent 38eace3 commit b21a10a
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 16 deletions.
32 changes: 30 additions & 2 deletions pkg/ccl/kvccl/kvtenantccl/tenant_trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,25 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/stretchr/testify/require"
)

// TestTenantTracesAreRedacted is an end-to-end version of
// `kvserver.TestMaybeRedactRecording`.
func TestTenantTracesAreRedacted(t *testing.T) {
defer leaktest.AfterTest(t)()
testutils.RunTrueAndFalse(t, "redactable", func(t *testing.T, redactable bool) {
testTenantTracesAreRedactedImpl(t, redactable)
})
}

func testTenantTracesAreRedactedImpl(t *testing.T, redactable bool) {
defer log.Scope(t).Close(t)
ctx := context.Background()

Expand Down Expand Up @@ -61,6 +69,7 @@ SET tracing = off;
var args base.TestClusterArgs
args.ServerArgs.Knobs.Store = knobs
tc := serverutils.StartNewTestCluster(t, 1, args)
tc.Server(0).TracerI().(*tracing.Tracer).SetRedactable(redactable)
defer tc.Stopper().Stop(ctx)

t.Run("system-tenant", func(t *testing.T) {
Expand Down Expand Up @@ -88,7 +97,9 @@ SET tracing = off;
defer tenDB.Close()
results := getTrace(t, tenDB)

const redactedMarkerString = "verbose trace message redacted"
var found bool
var foundRedactedMarker bool
for _, sl := range results {
for _, s := range sl {
if strings.Contains(s, sensitiveString) {
Expand All @@ -100,9 +111,26 @@ SET tracing = off;
if strings.Contains(s, visibleString) {
found = true
}
if strings.Contains(s, redactedMarkerString) {
foundRedactedMarker = true
}
}
}
require.True(t, found, "trace for tenant missing trace message '%q':\n%s",
visibleString, sqlutils.MatrixToStr(results))

if redactable {
// If redaction was on, we expect the tenant to see safe information in its
// trace.
require.True(t, found, "did not see expected trace message '%q':\n%s",
visibleString, sqlutils.MatrixToStr(results))
require.False(t, foundRedactedMarker, "unexpectedly found '%q':\n%s",
redactedMarkerString, sqlutils.MatrixToStr(results))
} else {
// Otherwise, expect the opposite: not even safe information makes it through,
// because it gets replaced with foundRedactedMarker.
require.False(t, found, "unexpectedly saw message '%q':\n%s",
visibleString, sqlutils.MatrixToStr(results))
require.True(t, foundRedactedMarker, "not not find expected message '%q':\n%s",
redactedMarkerString, sqlutils.MatrixToStr(results))
}
})
}
2 changes: 1 addition & 1 deletion pkg/server/node_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"github.com/cockroachdb/redact"
)

var sRedactedMarker = redact.RedactableString(redact.EscapeBytes(nil))
const sRedactedMarker = redact.RedactableString("verbose trace message redacted")

// redactRecordingForTenant redacts the sensitive parts of log messages in the
// recording if the tenant to which this recording is intended is not the system
Expand Down
5 changes: 3 additions & 2 deletions pkg/server/node_tenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ func TestRedactRecordingForTenant(t *testing.T) {
Add("tag_sensitive", tagSensitive).
Add("tag_not_sensitive", log.Safe(tagNotSensitive))
ctx := logtags.WithTags(context.Background(), tags)
ctx, sp := tracing.NewTracer().StartSpanCtx(ctx, "foo", tracing.WithRecording(tracing.RecordingVerbose))

tracer := tracing.NewTracer()
tracer.SetRedactable(true)
ctx, sp := tracer.StartSpanCtx(ctx, "foo", tracing.WithRecording(tracing.RecordingVerbose))
log.Eventf(ctx, "%s %s", msgSensitive, log.Safe(msgNotSensitive))
sp.SetTag("all_span_tags_are_stripped", attribute.StringValue("because_no_redactability"))
rec := sp.FinishAndGetRecording(tracing.RecordingVerbose)
Expand Down
34 changes: 33 additions & 1 deletion pkg/util/log/log_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"
"os"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -129,8 +130,39 @@ func (e *logEntry) SafeFormat(w interfaces.SafePrinter, _ rune) {
}
}

// String is a faster implementation than `SafeFormat` which is why we
// don't follow the usual convention of implementing `String` via a call
// to `redact.StringWithoutMarkers()`. This implementation is still
// around because it sits in the hot path of verbose tracing.
func (e *logEntry) String() string {
return redact.StringWithoutMarkers(e)
entry := e.convertToLegacy()
if len(entry.Tags) == 0 && len(entry.File) == 0 && !entry.Redactable {
// Shortcut.
return entry.Message
}

var buf strings.Builder
if len(entry.File) != 0 {
buf.WriteString(entry.File)
buf.WriteByte(':')
buf.WriteString(strconv.FormatInt(entry.Line, 10))
buf.WriteByte(' ')
}
if len(entry.Tags) > 0 {
buf.WriteByte('[')
buf.WriteString(entry.Tags)
buf.WriteString("] ")
}
buf.WriteString(entry.Message)
msg := buf.String()

if entry.Redactable {
// This is true when eventInternal is called from logfDepth(),
// ie. a regular log call. In this case, the tags and message may contain
// redaction markers. We remove them here.
msg = redact.RedactableString(msg).StripMarkers()
}
return msg
}

type entryPayload struct {
Expand Down
15 changes: 10 additions & 5 deletions pkg/util/log/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,13 @@ func getSpanOrEventLog(ctx context.Context) (*tracing.Span, *ctxEventLog, bool)
return nil, nil, false
}

// eventInternal is the common code for logging an event to trace and/or event
// logs. All entries passed to this method should be redactable.
// eventInternal is the common code for logging an event to trace and/or
// event logs. Entries passed to this method may or may not be
// redactable (via the `logEntry.redactable` flag). However, if
// sp.Redactable() is true then they will be printed as redactable by
// `Recordf`. I the entry is not redactable, the entirety of the message
// will be marked redactable, otherwise fine-grainer redaction will
// remain in the result.
func eventInternal(sp *tracing.Span, el *ctxEventLog, isErr bool, entry *logEntry) {
if sp != nil {
sp.Recordf("%s", entry)
Expand Down Expand Up @@ -162,7 +167,7 @@ func Event(ctx context.Context, msg string) {
severity.INFO, /* unused for trace events */
channel.DEV, /* unused for trace events */
1, /* depth */
true, /* redactable */
sp.Redactable(),
msg)
eventInternal(sp, el, false /* isErr */, &entry)
}
Expand All @@ -182,7 +187,7 @@ func Eventf(ctx context.Context, format string, args ...interface{}) {
severity.INFO, /* unused for trace events */
channel.DEV, /* unused for trace events */
1, /* depth */
true, /* redactable */
sp.Redactable(),
format, args...)
eventInternal(sp, el, false /* isErr */, &entry)
}
Expand All @@ -207,7 +212,7 @@ func vEventf(
severity.INFO, /* unused for trace events */
channel.DEV, /* unused for trace events */
depth+1,
true, /* redactable */
sp.Redactable(),
format, args...)
eventInternal(sp, el, isErr, &entry)
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/util/tracing/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ func (sp *Span) Tracer() *Tracer {
return sp.i.Tracer()
}

// Redactable returns true if this Span's tracer is marked redactable
func (sp *Span) Redactable() bool {
if sp == nil || sp.i.isNoop() {
return false
}
return sp.Tracer().Redactable()
}

// Finish idempotently marks the Span as completed (at which point it will
// silently drop any new data added to it). Finishing a nil *Span is a noop.
func (sp *Span) Finish() {
Expand Down
12 changes: 11 additions & 1 deletion pkg/util/tracing/span_inner.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package tracing

import (
"fmt"
"strings"

"github.com/cockroachdb/cockroach/pkg/util/timeutil"
Expand Down Expand Up @@ -181,7 +182,16 @@ func (s *spanInner) Recordf(format string, args ...interface{}) {
if !s.hasVerboseSink() {
return
}
str := redact.Sprintf(format, args...)
var str redact.RedactableString
if s.Tracer().Redactable() {
str = redact.Sprintf(format, args...)
} else {
// `fmt.Sprintf` when called on a logEntry will use the faster
// `logEntry.String` method instead of `logEntry.SafeFormat`.
// The additional use of `redact.Sprintf("%s",...)` is necessary
// to wrap the result in redaction markers.
str = redact.Sprintf("%s", fmt.Sprintf(format, args...))
}
if s.otelSpan != nil {
// TODO(obs-inf): depending on the situation it may be more appropriate to
// redact the string here.
Expand Down
24 changes: 24 additions & 0 deletions pkg/util/tracing/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,13 @@ type Tracer struct {
// True if tracing to the debug/requests endpoint. Accessed via t.useNetTrace().
_useNetTrace int32 // updated atomically

// True if we would like spans created from this tracer to be marked
// as redactable. This will make unstructured events logged to those
// spans redactable.
// Currently, this is used to mark spans as redactable and to redact
// them at the network boundary from KV.
_redactable int32 // accessed atomically

// Pointer to an OpenTelemetry tracer used as a "shadow tracer", if any. If
// not nil, the respective *otel.Tracer will be used to create mirror spans
// for all spans that the parent Tracer creates.
Expand Down Expand Up @@ -304,6 +311,23 @@ type TracerTestingKnobs struct {
UseNetTrace bool
}

// Redactable returns true if the tracer is configured to emit
// redactable logs. This value will affect the redactability of messages
// from already open Spans.
func (t *Tracer) Redactable() bool {
return atomic.LoadInt32(&t._redactable) != 0
}

// SetRedactable changes the redactability of the Tracer. This
// affects any future trace spans created.
func (t *Tracer) SetRedactable(to bool) {
var n int32
if to {
n = 1
}
atomic.StoreInt32(&t._redactable, n)
}

// NewTracer creates a Tracer. It initially tries to run with minimal overhead
// and collects essentially nothing; use Configure() to enable various tracing
// backends.
Expand Down
4 changes: 2 additions & 2 deletions pkg/util/tracing/tracingpb/recorded_span.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pkg/util/tracing/tracingpb/recorded_span.proto
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ message RecordedSpan {
(gogoproto.stdduration) = true];

// RedactableLogs determines whether the verbose log messages are redactable.
// This field was introduced in the 22.1 cycle. It can be removed in the 22.2
// cycle.
// This field was introduced in the 22.1 release. It can be removed in the 22.2
// release. On 22.1 this is always set to `true`.
bool redactable_logs = 15;
// Events logged in the span.
repeated LogRecord logs = 9 [(gogoproto.nullable) = false];
Expand Down

0 comments on commit b21a10a

Please sign in to comment.