Skip to content

Commit

Permalink
Add support for sampling rate in streamlog (vitessio#15919)
Browse files Browse the repository at this point in the history
Signed-off-by: Tim Vaillancourt <[email protected]>
  • Loading branch information
timvaillancourt committed Jul 18, 2024
1 parent 48ba53a commit 19844b2
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 0 deletions.
1 change: 1 addition & 0 deletions go/flags/endtoend/vtcombo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ Flags:
--querylog-filter-tag string string that must be present in the query for it to be logged; if using a value as the tag, you need to disable query normalization
--querylog-format string format for query logs ("text" or "json") (default "text")
--querylog-row-threshold uint Number of rows a query has to return or affect before being logged; not useful for streaming queries. 0 means all queries will be logged.
--querylog-sample-rate float Sample rate for logging queries. Value must be between 0.0 (no logging) and 1.0 (all queries)
--queryserver-config-acl-exempt-acl string an acl that exempt from table acl checking (this acl is free to access any vitess tables).
--queryserver-config-annotate-queries prefix queries to MySQL backend with comment indicating vtgate principal (user) and target tablet type
--queryserver-config-enable-table-acl-dry-run If this flag is enabled, tabletserver will emit monitoring metrics and let the request pass regardless of table acl check results
Expand Down
1 change: 1 addition & 0 deletions go/flags/endtoend/vtgate.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ Flags:
--querylog-filter-tag string string that must be present in the query for it to be logged; if using a value as the tag, you need to disable query normalization
--querylog-format string format for query logs ("text" or "json") (default "text")
--querylog-row-threshold uint Number of rows a query has to return or affect before being logged; not useful for streaming queries. 0 means all queries will be logged.
--querylog-sample-rate float Sample rate for logging queries. Value must be between 0.0 (no logging) and 1.0 (all queries)
--redact-debug-ui-queries redact full queries and bind variables from debug UI
--remote_operation_timeout duration time to wait for a remote operation (default 15s)
--retry-count int retry count (default 2)
Expand Down
1 change: 1 addition & 0 deletions go/flags/endtoend/vttablet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ Flags:
--querylog-filter-tag string string that must be present in the query for it to be logged; if using a value as the tag, you need to disable query normalization
--querylog-format string format for query logs ("text" or "json") (default "text")
--querylog-row-threshold uint Number of rows a query has to return or affect before being logged; not useful for streaming queries. 0 means all queries will be logged.
--querylog-sample-rate float Sample rate for logging queries. Value must be between 0.0 (no logging) and 1.0 (all queries)
--queryserver-config-acl-exempt-acl string an acl that exempt from table acl checking (this acl is free to access any vitess tables).
--queryserver-config-annotate-queries prefix queries to MySQL backend with comment indicating vtgate principal (user) and target tablet type
--queryserver-config-enable-table-acl-dry-run If this flag is enabled, tabletserver will emit monitoring metrics and let the request pass regardless of table acl check results
Expand Down
21 changes: 21 additions & 0 deletions go/streamlog/streamlog.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package streamlog
import (
"fmt"
"io"
rand "math/rand/v2"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -51,6 +52,7 @@ var (
queryLogFilterTag string
queryLogRowThreshold uint64
queryLogFormat = "text"
queryLogSampleRate float64
)

func GetRedactDebugUIQueries() bool {
Expand All @@ -69,6 +71,10 @@ func SetQueryLogRowThreshold(newQueryLogRowThreshold uint64) {
queryLogRowThreshold = newQueryLogRowThreshold
}

func SetQueryLogSampleRate(sampleRate float64) {
queryLogSampleRate = sampleRate
}

func GetQueryLogFormat() string {
return queryLogFormat
}
Expand Down Expand Up @@ -96,6 +102,8 @@ func registerStreamLogFlags(fs *pflag.FlagSet) {
// QueryLogRowThreshold only log queries returning or affecting this many rows
fs.Uint64Var(&queryLogRowThreshold, "querylog-row-threshold", queryLogRowThreshold, "Number of rows a query has to return or affect before being logged; not useful for streaming queries. 0 means all queries will be logged.")

// QueryLogSampleRate causes a sample of queries to be logged
fs.Float64Var(&queryLogSampleRate, "querylog-sample-rate", queryLogSampleRate, "Sample rate for logging queries. Value must be between 0.0 (no logging) and 1.0 (all queries)")
}

const (
Expand Down Expand Up @@ -249,9 +257,22 @@ func GetFormatter[T any](logger *StreamLogger[T]) LogFormatter {
}
}

// shouldSampleQuery returns true if a query should be sampled based on queryLogSampleRate
func shouldSampleQuery() bool {
if queryLogSampleRate <= 0 {
return false
} else if queryLogSampleRate >= 1 {
return true
}
return rand.Float64() <= queryLogSampleRate
}

// ShouldEmitLog returns whether the log with the given SQL query
// should be emitted or filtered
func ShouldEmitLog(sql string, rowsAffected, rowsReturned uint64) bool {
if shouldSampleQuery() {
return true
}
if queryLogRowThreshold > max(rowsAffected, rowsReturned) && queryLogFilterTag == "" {
return false
}
Expand Down
131 changes: 131 additions & 0 deletions go/streamlog/streamlog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/vt/servenv"
)

Expand Down Expand Up @@ -260,3 +263,131 @@ func TestFile(t *testing.T) {
t.Errorf("streamlog file: want %q got %q", want, got)
}
}

func TestShouldSampleQuery(t *testing.T) {
queryLogSampleRate = -1
assert.False(t, shouldSampleQuery())

queryLogSampleRate = 0
assert.False(t, shouldSampleQuery())

// for test coverage, can't test a random result
queryLogSampleRate = 0.5
shouldSampleQuery()

queryLogSampleRate = 1.0
assert.True(t, shouldSampleQuery())

queryLogSampleRate = 100.0
assert.True(t, shouldSampleQuery())
}

func TestShouldEmitLog(t *testing.T) {
origQueryLogFilterTag := queryLogFilterTag
origQueryLogRowThreshold := queryLogRowThreshold
origQueryLogSampleRate := queryLogSampleRate
defer func() {
SetQueryLogFilterTag(origQueryLogFilterTag)
SetQueryLogRowThreshold(origQueryLogRowThreshold)
SetQueryLogSampleRate(origQueryLogSampleRate)
}()

tests := []struct {
sql string
qLogFilterTag string
qLogRowThreshold uint64
qLogSampleRate float64
rowsAffected uint64
rowsReturned uint64
ok bool
}{
{
sql: "queryLogThreshold smaller than affected and returned",
qLogFilterTag: "",
qLogRowThreshold: 2,
qLogSampleRate: 0.0,
rowsAffected: 7,
rowsReturned: 7,
ok: true,
},
{
sql: "queryLogThreshold greater than affected and returned",
qLogFilterTag: "",
qLogRowThreshold: 27,
qLogSampleRate: 0.0,
rowsAffected: 7,
rowsReturned: 17,
ok: false,
},
{
sql: "this doesn't contains queryFilterTag: TAG",
qLogFilterTag: "special tag",
qLogRowThreshold: 10,
qLogSampleRate: 0.0,
rowsAffected: 7,
rowsReturned: 17,
ok: false,
},
{
sql: "this contains queryFilterTag: TAG",
qLogFilterTag: "TAG",
qLogRowThreshold: 0,
qLogSampleRate: 0.0,
rowsAffected: 7,
rowsReturned: 17,
ok: true,
},
{
sql: "this contains querySampleRate: 1.0",
qLogFilterTag: "",
qLogRowThreshold: 0,
qLogSampleRate: 1.0,
rowsAffected: 7,
rowsReturned: 17,
ok: true,
},
{
sql: "this contains querySampleRate: 1.0 without expected queryFilterTag",
qLogFilterTag: "TAG",
qLogRowThreshold: 0,
qLogSampleRate: 1.0,
rowsAffected: 7,
rowsReturned: 17,
ok: true,
},
}

for _, tt := range tests {
t.Run(tt.sql, func(t *testing.T) {
SetQueryLogFilterTag(tt.qLogFilterTag)
SetQueryLogRowThreshold(tt.qLogRowThreshold)
SetQueryLogSampleRate(tt.qLogSampleRate)

require.Equal(t, tt.ok, ShouldEmitLog(tt.sql, tt.rowsAffected, tt.rowsReturned))
})
}
}

func BenchmarkShouldEmitLog(b *testing.B) {
b.Run("default", func(b *testing.B) {
SetQueryLogSampleRate(0.0)
for i := 0; i < b.N; i++ {
ShouldEmitLog("select * from test where user='someone'", 0, 123)
}
})
b.Run("filter_tag", func(b *testing.B) {
SetQueryLogSampleRate(0.0)
SetQueryLogFilterTag("LOG_QUERY")
defer SetQueryLogFilterTag("")
for i := 0; i < b.N; i++ {
ShouldEmitLog("select /* LOG_QUERY=1 */ * from test where user='someone'", 0, 123)
}
})
b.Run("50%_sample_rate", func(b *testing.B) {
SetQueryLogSampleRate(0.5)
defer SetQueryLogSampleRate(0.0)
for i := 0; i < b.N; i++ {
ShouldEmitLog("select * from test where user='someone'", 0, 123)
}
})
}

0 comments on commit 19844b2

Please sign in to comment.