Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 2 new metrics with tablet type labels #13521

Merged
merged 2 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions go/vt/vttablet/tabletserver/query_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,9 @@ import (
"sync/atomic"
"time"

"vitess.io/vitess/go/mysql/sqlerror"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/vterrors"

"vitess.io/vitess/go/acl"
"vitess.io/vitess/go/cache"
"vitess.io/vitess/go/mysql/sqlerror"
"vitess.io/vitess/go/pools"
"vitess.io/vitess/go/stats"
"vitess.io/vitess/go/streamlog"
Expand All @@ -41,9 +38,12 @@ import (
"vitess.io/vitess/go/vt/dbconnpool"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logutil"
"vitess.io/vitess/go/vt/proto/topodata"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/tableacl"
tacl "vitess.io/vitess/go/vt/tableacl/acl"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
"vitess.io/vitess/go/vt/vttablet/tabletserver/planbuilder"
"vitess.io/vitess/go/vt/vttablet/tabletserver/rules"
Expand Down Expand Up @@ -172,7 +172,7 @@ type QueryEngine struct {

// stats
// Note: queryErrorCountsWithCode is similar to queryErrorCounts except it contains error code as an additional dimension
queryCounts, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned *stats.CountersWithMultiLabels
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned *stats.CountersWithMultiLabels

// stats flags
enablePerWorkloadTableMetrics bool
Expand Down Expand Up @@ -258,6 +258,7 @@ func NewQueryEngine(env tabletenv.Env, se *schema.Engine) *QueryEngine {
}

qe.queryCounts = env.Exporter().NewCountersWithMultiLabels("QueryCounts", "query counts", labels)
qe.queryCountsWithTabletType = env.Exporter().NewCountersWithMultiLabels("QueryCountsWithTabletType", "query counts with tablet type labels", []string{"Table", "Plan", "TabletType"})
qe.queryTimes = env.Exporter().NewCountersWithMultiLabels("QueryTimesNs", "query times in ns", labels)
qe.queryRowsAffected = env.Exporter().NewCountersWithMultiLabels("QueryRowsAffected", "query rows affected", labels)
qe.queryRowsReturned = env.Exporter().NewCountersWithMultiLabels("QueryRowsReturned", "query rows returned", labels)
Expand Down Expand Up @@ -490,7 +491,7 @@ func (qe *QueryEngine) QueryPlanCacheLen() int {
}

// AddStats adds the given stats for the planName.tableName
func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, workload string, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
// table names can contain "." characters, replace them!
keys := []string{tableName, planType.String()}
// Only use the workload as a label if that's enabled in the configuration.
Expand All @@ -500,6 +501,9 @@ func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, worklo
qe.queryCounts.Add(keys, queryCount)
qe.queryTimes.Add(keys, int64(duration))
qe.queryErrorCounts.Add(keys, errorCount)

qe.queryCountsWithTabletType.Add([]string{tableName, planType.String(), tabletType.String()}, queryCount)

// queryErrorCountsWithCode is similar to queryErrorCounts except we have an additional dimension
// of error code.
if errorCount > 0 {
Expand Down
43 changes: 42 additions & 1 deletion go/vt/vttablet/tabletserver/query_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
"testing"
"time"

"vitess.io/vitess/go/vt/proto/topodata"

"vitess.io/vitess/go/vt/sqlparser"

"vitess.io/vitess/go/mysql"
Expand Down Expand Up @@ -577,6 +579,7 @@ func TestAddQueryStats(t *testing.T) {
name string
planType planbuilder.PlanType
tableName string
tabletType topodata.TabletType
queryCount int64
duration time.Duration
mysqlTime time.Duration
Expand All @@ -587,6 +590,7 @@ func TestAddQueryStats(t *testing.T) {
enablePerWorkloadTableMetrics bool
workload string
expectedQueryCounts string
expectedQueryCountsWithTableType string
expectedQueryTimes string
expectedQueryRowsAffected string
expectedQueryRowsReturned string
Expand All @@ -597,6 +601,27 @@ func TestAddQueryStats(t *testing.T) {
name: "select query",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 0,
rowsReturned: 15,
errorCount: 0,
errorCode: "OK",
enablePerWorkloadTableMetrics: false,
workload: "some-workload",
expectedQueryCounts: `{"A.Select": 1}`,
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select query against a replica",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_REPLICA,
queryCount: 1,
duration: 10,
rowsAffected: 0,
Expand All @@ -611,10 +636,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.REPLICA": 1}`,
}, {
name: "select into query",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 15,
Expand All @@ -629,10 +656,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 0,
Expand All @@ -647,10 +676,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQueryErrorCounts: `{"A.Select": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query",
planType: planbuilder.PlanInsert,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 15,
Expand All @@ -665,10 +696,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{}`,
expectedQueryErrorCounts: `{"A.Insert": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
}, {
name: "select query with per workload metrics",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 0,
Expand All @@ -683,10 +716,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{"A.Select.some-workload": 15}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select into query with per workload metrics",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 15,
Expand All @@ -701,10 +736,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error with per workload metrics",
planType: planbuilder.PlanSelect,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 0,
Expand All @@ -719,10 +756,12 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query with per workload metrics",
planType: planbuilder.PlanInsert,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
duration: 10,
rowsAffected: 15,
Expand All @@ -737,6 +776,7 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryRowsReturned: `{}`,
expectedQueryErrorCounts: `{"A.Insert.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
},
}

Expand All @@ -749,8 +789,9 @@ func TestAddQueryStats(t *testing.T) {
env := tabletenv.NewEnv(config, "TestAddQueryStats_"+testcase.name)
se := schema.NewEngine(env)
qe := NewQueryEngine(env, se)
qe.AddStats(testcase.planType, testcase.tableName, testcase.workload, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
qe.AddStats(testcase.planType, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
assert.Equal(t, testcase.expectedQueryCounts, qe.queryCounts.String())
assert.Equal(t, testcase.expectedQueryCountsWithTableType, qe.queryCountsWithTabletType.String())
assert.Equal(t, testcase.expectedQueryTimes, qe.queryTimes.String())
assert.Equal(t, testcase.expectedQueryRowsAffected, qe.queryRowsAffected.String())
assert.Equal(t, testcase.expectedQueryRowsReturned, qe.queryRowsReturned.String())
Expand Down
8 changes: 6 additions & 2 deletions go/vt/vttablet/tabletserver/query_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ func (qre *QueryExecutor) Execute() (reply *sqltypes.Result, err error) {
defer func(start time.Time) {
duration := time.Since(start)
qre.tsv.stats.QueryTimings.Add(planName, duration)
qre.tsv.stats.QueryTimingsByTabletType.Add(qre.tabletType.String(), duration)
qre.recordUserQuery("Execute", int64(duration))

mysqlTime := qre.logStats.MysqlResponseTime
Expand All @@ -135,13 +136,14 @@ func (qre *QueryExecutor) Execute() (reply *sqltypes.Result, err error) {
var errCode string
vtErrorCode := vterrors.Code(err)
errCode = vtErrorCode.String()

if reply == nil {
qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.tabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.plan.AddStats(1, duration, mysqlTime, 0, 0, 1)
return
}

qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.tabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.plan.AddStats(1, duration, mysqlTime, reply.RowsAffected, uint64(len(reply.Rows)), 0)
qre.logStats.RowsAffected = int(reply.RowsAffected)
qre.logStats.Rows = reply.Rows
Expand Down Expand Up @@ -313,6 +315,7 @@ func (qre *QueryExecutor) Stream(callback StreamCallback) error {

defer func(start time.Time) {
qre.tsv.stats.QueryTimings.Record(qre.plan.PlanID.String(), start)
qre.tsv.stats.QueryTimingsByTabletType.Record(qre.tabletType.String(), start)
qre.recordUserQuery("Stream", int64(time.Since(start)))
}(time.Now())

Expand Down Expand Up @@ -402,6 +405,7 @@ func (qre *QueryExecutor) MessageStream(callback StreamCallback) error {

defer func(start time.Time) {
qre.tsv.stats.QueryTimings.Record(qre.plan.PlanID.String(), start)
qre.tsv.stats.QueryTimingsByTabletType.Record(qre.tabletType.String(), start)
qre.recordUserQuery("MessageStream", int64(time.Since(start)))
}(time.Now())

Expand Down
4 changes: 4 additions & 0 deletions go/vt/vttablet/tabletserver/tabletenv/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ type Stats struct {
UserActiveReservedCount *stats.CountersWithSingleLabel // Per CallerID active reserved connection counts
UserReservedCount *stats.CountersWithSingleLabel // Per CallerID reserved connection counts
UserReservedTimesNs *stats.CountersWithSingleLabel // Per CallerID reserved connection duration

QueryTimingsByTabletType *servenv.TimingsWrapper // Query timings split by current tablet type
}

// NewStats instantiates a new set of stats scoped by exporter.
Expand Down Expand Up @@ -94,6 +96,8 @@ func NewStats(exporter *servenv.Exporter) *Stats {
UserActiveReservedCount: exporter.NewCountersWithSingleLabel("UserActiveReservedCount", "active reserved connection for each CallerID", "CallerID"),
UserReservedCount: exporter.NewCountersWithSingleLabel("UserReservedCount", "reserved connection received for each CallerID", "CallerID"),
UserReservedTimesNs: exporter.NewCountersWithSingleLabel("UserReservedTimesNs", "Total reserved connection latency for each CallerID", "CallerID"),

QueryTimingsByTabletType: exporter.NewTimings("QueryTimingsByTabletType", "Query timings broken down by active tablet type", "TabletType"),
}
stats.QPSRates = exporter.NewRates("QPS", stats.QueryTimings, 15*60/5, 5*time.Second)
return stats
Expand Down
8 changes: 8 additions & 0 deletions go/vt/vttablet/tabletserver/tabletserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ func (tsv *TabletServer) begin(ctx context.Context, target *querypb.Target, save
logStats.OriginalSQL = beginSQL
if beginSQL != "" {
tsv.stats.QueryTimings.Record("BEGIN", startTime)
tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), startTime)
} else {
logStats.Method = ""
}
Expand Down Expand Up @@ -573,6 +574,7 @@ func (tsv *TabletServer) Commit(ctx context.Context, target *querypb.Target, tra
// handlePanicAndSendLogStats doesn't log the no-op.
if commitSQL != "" {
tsv.stats.QueryTimings.Record("COMMIT", startTime)
tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), startTime)
} else {
logStats.Method = ""
}
Expand All @@ -590,6 +592,7 @@ func (tsv *TabletServer) Rollback(ctx context.Context, target *querypb.Target, t
target, nil, true, /* allowOnShutdown */
func(ctx context.Context, logStats *tabletenv.LogStats) error {
defer tsv.stats.QueryTimings.Record("ROLLBACK", time.Now())
defer tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), time.Now())
logStats.TransactionID = transactionID
newReservedID, err = tsv.te.Rollback(ctx, transactionID)
if newReservedID > 0 {
Expand Down Expand Up @@ -1198,6 +1201,7 @@ func (tsv *TabletServer) ReserveBeginExecute(ctx context.Context, target *queryp
target, options, false, /* allowOnShutdown */
func(ctx context.Context, logStats *tabletenv.LogStats) error {
defer tsv.stats.QueryTimings.Record("RESERVE", time.Now())
defer tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), time.Now())
connID, sessionStateChanges, err = tsv.te.ReserveBegin(ctx, options, preQueries, postBeginQueries)
if err != nil {
return err
Expand Down Expand Up @@ -1243,6 +1247,7 @@ func (tsv *TabletServer) ReserveBeginStreamExecute(
target, options, false, /* allowOnShutdown */
func(ctx context.Context, logStats *tabletenv.LogStats) error {
defer tsv.stats.QueryTimings.Record("RESERVE", time.Now())
defer tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), time.Now())
connID, sessionStateChanges, err = tsv.te.ReserveBegin(ctx, options, preQueries, postBeginQueries)
if err != nil {
return err
Expand Down Expand Up @@ -1296,6 +1301,7 @@ func (tsv *TabletServer) ReserveExecute(ctx context.Context, target *querypb.Tar
target, options, allowOnShutdown,
func(ctx context.Context, logStats *tabletenv.LogStats) error {
defer tsv.stats.QueryTimings.Record("RESERVE", time.Now())
defer tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), time.Now())
state.ReservedID, err = tsv.te.Reserve(ctx, options, transactionID, preQueries)
if err != nil {
return err
Expand Down Expand Up @@ -1346,6 +1352,7 @@ func (tsv *TabletServer) ReserveStreamExecute(
target, options, allowOnShutdown,
func(ctx context.Context, logStats *tabletenv.LogStats) error {
defer tsv.stats.QueryTimings.Record("RESERVE", time.Now())
defer tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), time.Now())
state.ReservedID, err = tsv.te.Reserve(ctx, options, transactionID, preQueries)
if err != nil {
return err
Expand Down Expand Up @@ -1375,6 +1382,7 @@ func (tsv *TabletServer) Release(ctx context.Context, target *querypb.Target, tr
target, nil, true, /* allowOnShutdown */
func(ctx context.Context, logStats *tabletenv.LogStats) error {
defer tsv.stats.QueryTimings.Record("RELEASE", time.Now())
defer tsv.stats.QueryTimingsByTabletType.Record(target.TabletType.String(), time.Now())
logStats.TransactionID = transactionID
logStats.ReservedID = reservedID
if reservedID != 0 {
Expand Down