From caf2ede1a3d06adf8349cb2bd7dbac6875db95f9 Mon Sep 17 00:00:00 2001 From: Marylia Gutierrez Date: Fri, 18 Feb 2022 12:05:43 -0500 Subject: [PATCH] server: new statement details endpoint New endpoint for Statement Details. The endpoint returns: - Statement: the total stats for that statement fingerprint - StatementsPerAggregatedTs: returns the statement from the period selected but each aggregated timestamp is an entry. This list can be used on the overview page on the Statement Details for the creation of charts by aggregation timestamp. - StatementsPerPlanHash: returns the statement from the period selected but each plan hash is an entry. This list can be used on the Explain Plan tab of the Statement Details, to show the execution stats per Plan. Partially addresses #72129 Release note (api change): New Statement Details endpoint that returns the details for a selected statement, and its execution separated by aggregation timestamp (statementsPerAggregatedTs) and by plan hash (statementsPerPlanHash). --- docs/generated/http/full.md | 98 ++++++ pkg/server/combined_statement_stats.go | 461 +++++++++++++++++++++++-- pkg/server/serverpb/status.go | 5 +- pkg/server/serverpb/status.proto | 59 +++- pkg/server/tenant_status.go | 17 + 5 files changed, 611 insertions(+), 29 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index d9a7f6aa9332..f2db7f9c8b40 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3723,6 +3723,104 @@ Support status: [reserved](#support-status) +## StatementDetails + +`GET /_status/stmtdetails/{fingerprint_id}` + + + +Support status: [reserved](#support-status) + +#### Request Parameters + + + + +StatementDetailsRequest requests the details of a Statement, based on its keys. + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| fingerprint_id | [string](#cockroach.server.serverpb.StatementDetailsRequest-string) | | fingerprint_id is generated by ConstructStatementFingerprintID using: query, failed, implicitTxn and database. So we don't need to add them to the request. | [reserved](#support-status) | +| app_names | [string](#cockroach.server.serverpb.StatementDetailsRequest-string) | repeated | | [reserved](#support-status) | +| start | [int64](#cockroach.server.serverpb.StatementDetailsRequest-int64) | | Unix time range for aggregated statements. | [reserved](#support-status) | +| end | [int64](#cockroach.server.serverpb.StatementDetailsRequest-int64) | | | [reserved](#support-status) | + + + + + + + +#### Response Parameters + + + + + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| statement | [StatementDetailsResponse.CollectedStatementSummary](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.server.serverpb.StatementDetailsResponse.CollectedStatementSummary) | | statement returns the total statistics for the statement. | [reserved](#support-status) | +| statements_per_aggregated_ts | [StatementDetailsResponse.CollectedStatementGroupedByAggregatedTs](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.server.serverpb.StatementDetailsResponse.CollectedStatementGroupedByAggregatedTs) | repeated | statements_per_aggregated_ts returns the same statement from above, but with its statistics separated by the aggregated timestamp. | [reserved](#support-status) | +| statements_per_plan_hash | [StatementDetailsResponse.CollectedStatementGroupedByPlanHash](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.server.serverpb.StatementDetailsResponse.CollectedStatementGroupedByPlanHash) | repeated | statements_per_plan_hash returns the same statement from above, but with its statistics separated by the plan hash. | [reserved](#support-status) | +| internal_app_name_prefix | [string](#cockroach.server.serverpb.StatementDetailsResponse-string) | | If set and non-empty, indicates the prefix to application_name used for statements/queries issued internally by CockroachDB. | [reserved](#support-status) | + + + + + + + +#### StatementDetailsResponse.CollectedStatementSummary + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| key_data | [cockroach.sql.StatementStatisticsKey](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.sql.StatementStatisticsKey) | | | [reserved](#support-status) | +| formatted_query | [string](#cockroach.server.serverpb.StatementDetailsResponse-string) | | Formatted query is the return of the key_data.query after prettify_statement. The value from the key_data cannot be replaced by the formatted value, because is used as is for diagnostic bundle. | [reserved](#support-status) | +| app_names | [string](#cockroach.server.serverpb.StatementDetailsResponse-string) | repeated | | [reserved](#support-status) | +| stats | [cockroach.sql.StatementStatistics](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.sql.StatementStatistics) | | | [reserved](#support-status) | +| aggregation_interval | [google.protobuf.Duration](#cockroach.server.serverpb.StatementDetailsResponse-google.protobuf.Duration) | | | [reserved](#support-status) | + + + + + + +#### StatementDetailsResponse.CollectedStatementGroupedByAggregatedTs + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| stats | [cockroach.sql.StatementStatistics](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.sql.StatementStatistics) | | | [reserved](#support-status) | +| aggregation_interval | [google.protobuf.Duration](#cockroach.server.serverpb.StatementDetailsResponse-google.protobuf.Duration) | | | [reserved](#support-status) | +| aggregated_ts | [google.protobuf.Timestamp](#cockroach.server.serverpb.StatementDetailsResponse-google.protobuf.Timestamp) | | | [reserved](#support-status) | + + + + + + +#### StatementDetailsResponse.CollectedStatementGroupedByPlanHash + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| stats | [cockroach.sql.StatementStatistics](#cockroach.server.serverpb.StatementDetailsResponse-cockroach.sql.StatementStatistics) | | | [reserved](#support-status) | +| aggregation_interval | [google.protobuf.Duration](#cockroach.server.serverpb.StatementDetailsResponse-google.protobuf.Duration) | | | [reserved](#support-status) | +| explain_plan | [string](#cockroach.server.serverpb.StatementDetailsResponse-string) | | | [reserved](#support-status) | +| plan_hash | [uint64](#cockroach.server.serverpb.StatementDetailsResponse-uint64) | | | [reserved](#support-status) | + + + + + + ## CreateStatementDiagnosticsReport `POST /_status/stmtdiagreports` diff --git a/pkg/server/combined_statement_stats.go b/pkg/server/combined_statement_stats.go index 277c1f4a329d..9e921601183b 100644 --- a/pkg/server/combined_statement_stats.go +++ b/pkg/server/combined_statement_stats.go @@ -13,6 +13,7 @@ package server import ( "context" "fmt" + "strconv" "strings" "time" @@ -26,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/sqlstats" "github.com/cockroachdb/cockroach/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil" + "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" ) @@ -68,15 +70,15 @@ func getCombinedStatementStats( startTime := getTimeFromSeconds(req.Start) endTime := getTimeFromSeconds(req.End) limit := SQLStatsResponseMax.Get(&settings.SV) - whereClause, orderAndLimit, args := getQueryClausesAndArgs(startTime, endTime, limit, testingKnobs) + whereClause, orderAndLimit, args := getCombinedStatementsQueryClausesAndArgs(startTime, endTime, limit, testingKnobs) statements, err := collectCombinedStatements(ctx, ie, whereClause, args, orderAndLimit) if err != nil { - return nil, err + return nil, serverError(ctx, err) } transactions, err := collectCombinedTransactions(ctx, ie, whereClause, args, orderAndLimit) if err != nil { - return nil, err + return nil, serverError(ctx, err) } response := &serverpb.StatementsResponse{ @@ -89,11 +91,13 @@ func getCombinedStatementStats( return response, nil } -// getQueryClausesAndArgs returns: +// getCombinedStatementsQueryClausesAndArgs returns: // - where clause (filtering by name and aggregates_ts when defined) // - order and limit clause // - args that will replace the clauses above -func getQueryClausesAndArgs( +// The whereClause will be in the format `WHERE A = $1 AND B = $2` and +// args will return the list of arguments in order that will replace the actual values. +func getCombinedStatementsQueryClausesAndArgs( start, end *time.Time, limit int64, testingKnobs *sqlstats.TestingKnobs, ) (whereClause string, orderAndLimitClause string, args []interface{}) { var buffer strings.Builder @@ -108,12 +112,11 @@ func getQueryClausesAndArgs( } if end != nil { - buffer.WriteString(fmt.Sprintf(" AND aggregated_ts <= $%d", len(args)+1)) args = append(args, *end) + buffer.WriteString(fmt.Sprintf(" AND aggregated_ts <= $%d", len(args))) } - - orderAndLimitClause = fmt.Sprintf(` ORDER BY aggregated_ts DESC LIMIT $%d`, len(args)+1) args = append(args, limit) + orderAndLimitClause = fmt.Sprintf(` ORDER BY aggregated_ts DESC LIMIT $%d`, len(args)) return buffer.String(), orderAndLimitClause, args } @@ -122,7 +125,7 @@ func collectCombinedStatements( ctx context.Context, ie *sql.InternalExecutor, whereClause string, - qargs []interface{}, + args []interface{}, orderAndLimit string, ) ([]serverpb.StatementsResponse_CollectedStatementStatistics, error) { @@ -150,10 +153,10 @@ func collectCombinedStatements( it, err := ie.QueryIteratorEx(ctx, "combined-stmts-by-interval", nil, sessiondata.InternalExecutorOverride{ User: security.NodeUserName(), - }, query, qargs...) + }, query, args...) if err != nil { - return nil, err + return nil, serverError(ctx, err) } defer func() { @@ -172,17 +175,17 @@ func collectCombinedStatements( } if row.Len() != expectedNumDatums { - return nil, errors.Newf("expected %d columns, receieved %d", expectedNumDatums) + return nil, errors.Newf("expected %d columns, received %d", expectedNumDatums) } var statementFingerprintID uint64 if statementFingerprintID, err = sqlstatsutil.DatumToUint64(row[0]); err != nil { - return nil, err + return nil, serverError(ctx, err) } var transactionFingerprintID uint64 if transactionFingerprintID, err = sqlstatsutil.DatumToUint64(row[1]); err != nil { - return nil, err + return nil, serverError(ctx, err) } app := string(tree.MustBeDString(row[2])) @@ -191,7 +194,7 @@ func collectCombinedStatements( var metadata roachpb.CollectedStatementStatistics metadataJSON := tree.MustBeDJSON(row[4]).JSON if err = sqlstatsutil.DecodeStmtStatsMetadataJSON(metadataJSON, &metadata); err != nil { - return nil, err + return nil, serverError(ctx, err) } metadata.Key.App = app @@ -200,13 +203,13 @@ func collectCombinedStatements( statsJSON := tree.MustBeDJSON(row[5]).JSON if err = sqlstatsutil.DecodeStmtStatsStatisticsJSON(statsJSON, &metadata.Stats); err != nil { - return nil, err + return nil, serverError(ctx, err) } planJSON := tree.MustBeDJSON(row[6]).JSON plan, err := sqlstatsutil.JSONToExplainTreePlanNode(planJSON) if err != nil { - return nil, err + return nil, serverError(ctx, err) } metadata.Stats.SensitiveInfo.MostRecentPlanDescription = *plan @@ -227,7 +230,7 @@ func collectCombinedStatements( } if err != nil { - return nil, err + return nil, serverError(ctx, err) } return statements, nil @@ -237,7 +240,7 @@ func collectCombinedTransactions( ctx context.Context, ie *sql.InternalExecutor, whereClause string, - qargs []interface{}, + args []interface{}, orderAndLimit string, ) ([]serverpb.StatementsResponse_ExtendedCollectedTransactionStatistics, error) { @@ -262,10 +265,10 @@ func collectCombinedTransactions( it, err := ie.QueryIteratorEx(ctx, "combined-txns-by-interval", nil, sessiondata.InternalExecutorOverride{ User: security.NodeUserName(), - }, query, qargs...) + }, query, args...) if err != nil { - return nil, err + return nil, serverError(ctx, err) } defer func() { @@ -284,25 +287,25 @@ func collectCombinedTransactions( } if row.Len() != expectedNumDatums { - return nil, errors.Newf("expected %d columns, receieved %d", expectedNumDatums, row.Len()) + return nil, errors.Newf("expected %d columns, received %d", expectedNumDatums, row.Len()) } app := string(tree.MustBeDString(row[0])) aggregatedTs := tree.MustBeDTimestampTZ(row[1]).Time fingerprintID, err := sqlstatsutil.DatumToUint64(row[2]) if err != nil { - return nil, err + return nil, serverError(ctx, err) } var metadata roachpb.CollectedTransactionStatistics metadataJSON := tree.MustBeDJSON(row[3]).JSON if err = sqlstatsutil.DecodeTxnStatsMetadataJSON(metadataJSON, &metadata); err != nil { - return nil, err + return nil, serverError(ctx, err) } statsJSON := tree.MustBeDJSON(row[4]).JSON if err = sqlstatsutil.DecodeTxnStatsStatisticsJSON(statsJSON, &metadata.Stats); err != nil { - return nil, err + return nil, serverError(ctx, err) } aggInterval := tree.MustBeDInterval(row[5]).Duration @@ -322,8 +325,414 @@ func collectCombinedTransactions( } if err != nil { - return nil, err + return nil, serverError(ctx, err) } return transactions, nil } + +func (s *statusServer) StatementDetails( + ctx context.Context, req *serverpb.StatementDetailsRequest, +) (*serverpb.StatementDetailsResponse, error) { + ctx = propagateGatewayMetadata(ctx) + ctx = s.AnnotateCtx(ctx) + + if err := s.privilegeChecker.requireViewActivityOrViewActivityRedactedPermission(ctx); err != nil { + return nil, err + } + + return getStatementDetails( + ctx, + req, + s.internalExecutor, + s.st, + s.sqlServer.execCfg.SQLStatsTestingKnobs) +} + +func getStatementDetails( + ctx context.Context, + req *serverpb.StatementDetailsRequest, + ie *sql.InternalExecutor, + settings *cluster.Settings, + testingKnobs *sqlstats.TestingKnobs, +) (*serverpb.StatementDetailsResponse, error) { + limit := SQLStatsResponseMax.Get(&settings.SV) + whereClause, args, err := getStatementDetailsQueryClausesAndArgs(req, testingKnobs) + if err != nil { + return nil, serverError(ctx, err) + } + + statementTotal, err := getTotalStatementDetails(ctx, ie, whereClause, args) + if err != nil { + return nil, serverError(ctx, err) + } + statementsPerAggregatedTs, err := getStatementDetailsPerAggregatedTs(ctx, ie, whereClause, args, limit) + if err != nil { + return nil, serverError(ctx, err) + } + statementsPerPlanHash, err := getStatementDetailsPerPlanHash(ctx, ie, whereClause, args, limit) + if err != nil { + return nil, serverError(ctx, err) + } + + response := &serverpb.StatementDetailsResponse{ + Statement: statementTotal, + StatementsPerAggregatedTs: statementsPerAggregatedTs, + StatementsPerPlanHash: statementsPerPlanHash, + InternalAppNamePrefix: catconstants.InternalAppNamePrefix, + } + + return response, nil +} + +// getStatementDetailsQueryClausesAndArgs returns whereClause and its arguments. +// The whereClause will be in the format `WHERE A = $1 AND B = $2` and +// args will return the list of arguments in order that will replace the actual values. +func getStatementDetailsQueryClausesAndArgs( + req *serverpb.StatementDetailsRequest, testingKnobs *sqlstats.TestingKnobs, +) (whereClause string, args []interface{}, err error) { + var buffer strings.Builder + buffer.WriteString(testingKnobs.GetAOSTClause()) + + fingerprintID, err := strconv.ParseUint(req.FingerprintId, 10, 64) + if err != nil { + return "", nil, err + } + args = append(args, strconv.FormatUint(fingerprintID, 16)) + buffer.WriteString(fmt.Sprintf(" WHERE encode(fingerprint_id, 'hex') = $%d", len(args))) + + // Filter out internal statements by app name. + buffer.WriteString(fmt.Sprintf(" AND app_name NOT LIKE '%s%%'", catconstants.InternalAppNamePrefix)) + + // Statements are grouped ignoring the app name in the Statements/Transactions page, so when + // calling for the Statement Details endpoint, this value can be empty or a list of app names. + if len(req.AppNames) > 0 { + if !(len(req.AppNames) == 1 && req.AppNames[0] == "") { + buffer.WriteString(" AND (") + for i, app := range req.AppNames { + if i != 0 { + args = append(args, app) + buffer.WriteString(fmt.Sprintf(" OR app_name = $%d", len(args))) + } else { + args = append(args, app) + buffer.WriteString(fmt.Sprintf(" app_name = $%d", len(args))) + } + } + buffer.WriteString(" )") + } + } + + start := getTimeFromSeconds(req.Start) + if start != nil { + args = append(args, *start) + buffer.WriteString(fmt.Sprintf(" AND aggregated_ts >= $%d", len(args))) + } + end := getTimeFromSeconds(req.End) + if end != nil { + args = append(args, *end) + buffer.WriteString(fmt.Sprintf(" AND aggregated_ts <= $%d", len(args))) + } + whereClause = buffer.String() + + return whereClause, args, nil +} + +// getTotalStatementDetails return all the statistics for the selectec statement combined. +func getTotalStatementDetails( + ctx context.Context, ie *sql.InternalExecutor, whereClause string, args []interface{}, +) (serverpb.StatementDetailsResponse_CollectedStatementSummary, error) { + query := fmt.Sprintf( + `SELECT + metadata, + aggregation_interval, + prettify_statement(metadata ->> 'query', %d, %d, %d) as query, + array_agg(app_name) as app_names, + crdb_internal.merge_statement_stats(array_agg(statistics)) AS statistics, + max(sampled_plan) as sampled_plan + FROM crdb_internal.statement_statistics %s + GROUP BY + metadata, + aggregation_interval + LIMIT 1`, tree.ConsoleLineWidth, tree.PrettyAlignAndDeindent, tree.UpperCase, whereClause) + + const expectedNumDatums = 6 + var statement serverpb.StatementDetailsResponse_CollectedStatementSummary + + row, err := ie.QueryRowEx(ctx, "combined-stmts-details-total", nil, + sessiondata.InternalExecutorOverride{ + User: security.NodeUserName(), + }, query, args...) + + if err != nil { + return statement, serverError(ctx, err) + } + if len(row) == 0 { + return statement, serverError(ctx, errors.New("statement not found")) + } + if row.Len() != expectedNumDatums { + return statement, serverError(ctx, errors.Newf("expected %d columns, received %d", expectedNumDatums)) + } + + var statistics roachpb.CollectedStatementStatistics + metadataJSON := tree.MustBeDJSON(row[0]).JSON + if err = sqlstatsutil.DecodeStmtStatsMetadataJSON(metadataJSON, &statistics); err != nil { + return statement, serverError(ctx, err) + } + + aggInterval := tree.MustBeDInterval(row[1]).Duration + queryPrettify := string(tree.MustBeDString(row[2])) + + apps := tree.MustBeDArray(row[3]) + var appNames []string + for _, s := range apps.Array { + appNames = util.CombineUniqueString(appNames, []string{string(tree.MustBeDString(s))}) + } + + statsJSON := tree.MustBeDJSON(row[4]).JSON + if err = sqlstatsutil.DecodeStmtStatsStatisticsJSON(statsJSON, &statistics.Stats); err != nil { + return statement, serverError(ctx, err) + } + + planJSON := tree.MustBeDJSON(row[5]).JSON + plan, err := sqlstatsutil.JSONToExplainTreePlanNode(planJSON) + if err != nil { + return statement, serverError(ctx, err) + } + statistics.Stats.SensitiveInfo.MostRecentPlanDescription = *plan + + statement = serverpb.StatementDetailsResponse_CollectedStatementSummary{ + KeyData: statistics.Key, + FormattedQuery: queryPrettify, + AppNames: appNames, + AggregationInterval: time.Duration(aggInterval.Nanos()), + Stats: statistics.Stats, + } + + return statement, nil +} + +// getStatementDetailsPerAggregatedTs returns the list of statements +// per aggregated timestamp, not using the columns plan hash as +// part of the key on the grouping. +func getStatementDetailsPerAggregatedTs( + ctx context.Context, + ie *sql.InternalExecutor, + whereClause string, + args []interface{}, + limit int64, +) ([]serverpb.StatementDetailsResponse_CollectedStatementGroupedByAggregatedTs, error) { + query := fmt.Sprintf( + `SELECT + aggregated_ts, + metadata, + crdb_internal.merge_statement_stats(array_agg(statistics)) AS statistics, + max(sampled_plan) as sampled_plan, + aggregation_interval + FROM crdb_internal.statement_statistics %s + GROUP BY + aggregated_ts, + metadata, + aggregation_interval + LIMIT $%d`, whereClause, len(args)+1) + + args = append(args, limit) + const expectedNumDatums = 5 + + it, err := ie.QueryIteratorEx(ctx, "combined-stmts-details-by-aggregated-timestamp", nil, + sessiondata.InternalExecutorOverride{ + User: security.NodeUserName(), + }, query, args...) + + if err != nil { + return nil, serverError(ctx, err) + } + + defer func() { + closeErr := it.Close() + if closeErr != nil { + err = errors.CombineErrors(err, closeErr) + } + }() + + var statements []serverpb.StatementDetailsResponse_CollectedStatementGroupedByAggregatedTs + var ok bool + for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { + var row tree.Datums + if row = it.Cur(); row == nil { + return nil, errors.New("unexpected null row") + } + + if row.Len() != expectedNumDatums { + return nil, errors.Newf("expected %d columns, received %d", expectedNumDatums) + } + + aggregatedTs := tree.MustBeDTimestampTZ(row[0]).Time + + var metadata roachpb.CollectedStatementStatistics + metadataJSON := tree.MustBeDJSON(row[1]).JSON + if err = sqlstatsutil.DecodeStmtStatsMetadataJSON(metadataJSON, &metadata); err != nil { + return nil, serverError(ctx, err) + } + + statsJSON := tree.MustBeDJSON(row[2]).JSON + if err = sqlstatsutil.DecodeStmtStatsStatisticsJSON(statsJSON, &metadata.Stats); err != nil { + return nil, serverError(ctx, err) + } + + planJSON := tree.MustBeDJSON(row[3]).JSON + plan, err := sqlstatsutil.JSONToExplainTreePlanNode(planJSON) + if err != nil { + return nil, serverError(ctx, err) + } + metadata.Stats.SensitiveInfo.MostRecentPlanDescription = *plan + + aggInterval := tree.MustBeDInterval(row[4]).Duration + + stmt := serverpb.StatementDetailsResponse_CollectedStatementGroupedByAggregatedTs{ + AggregatedTs: aggregatedTs, + AggregationInterval: time.Duration(aggInterval.Nanos()), + Stats: metadata.Stats, + } + + statements = append(statements, stmt) + } + if err != nil { + return nil, serverError(ctx, err) + } + + return statements, nil +} + +// getExplainPlanFromGist decode the Explain Plan from a Plan Gist. +func getExplainPlanFromGist(ctx context.Context, ie *sql.InternalExecutor, planGist string) string { + planError := "Error collecting Explain Plan." + var args []interface{} + + query := `SELECT crdb_internal.decode_plan_gist($1)` + args = append(args, planGist) + + it, err := ie.QueryIteratorEx(ctx, "combined-stmts-details-get-explain-plan", nil, + sessiondata.InternalExecutorOverride{ + User: security.NodeUserName(), + }, query, args...) + + if err != nil { + return planError + } + + var explainPlan []string + var ok bool + for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { + var row tree.Datums + if row = it.Cur(); row == nil { + return planError + } + explainPlanLine := string(tree.MustBeDString(row[0])) + explainPlan = append(explainPlan, explainPlanLine) + } + if err != nil { + return planError + } + + return strings.Join(explainPlan, "\n") +} + +// getStatementDetailsPerPlanHash returns the list of statements +// per plan hash, not using the columns aggregated timestamp as +// part of the key on the grouping. +func getStatementDetailsPerPlanHash( + ctx context.Context, + ie *sql.InternalExecutor, + whereClause string, + args []interface{}, + limit int64, +) ([]serverpb.StatementDetailsResponse_CollectedStatementGroupedByPlanHash, error) { + query := fmt.Sprintf( + `SELECT + plan_hash, + (statistics -> 'statistics' -> 'planGists'->>0) as plan_gist, + metadata, + crdb_internal.merge_statement_stats(array_agg(statistics)) AS statistics, + max(sampled_plan) as sampled_plan, + aggregation_interval + FROM crdb_internal.statement_statistics %s + GROUP BY + plan_hash, + plan_gist, + metadata, + aggregation_interval + LIMIT $%d`, whereClause, len(args)+1) + + args = append(args, limit) + const expectedNumDatums = 6 + + it, err := ie.QueryIteratorEx(ctx, "combined-stmts-details-by-plan-hash", nil, + sessiondata.InternalExecutorOverride{ + User: security.NodeUserName(), + }, query, args...) + + if err != nil { + return nil, serverError(ctx, err) + } + + defer func() { + closeErr := it.Close() + if closeErr != nil { + err = errors.CombineErrors(err, closeErr) + } + }() + + var statements []serverpb.StatementDetailsResponse_CollectedStatementGroupedByPlanHash + var ok bool + for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { + var row tree.Datums + if row = it.Cur(); row == nil { + return nil, errors.New("unexpected null row") + } + + if row.Len() != expectedNumDatums { + return nil, errors.Newf("expected %d columns, received %d", expectedNumDatums) + } + + var planHash uint64 + if planHash, err = sqlstatsutil.DatumToUint64(row[0]); err != nil { + return nil, serverError(ctx, err) + } + planGist := string(tree.MustBeDString(row[1])) + explainPlan := getExplainPlanFromGist(ctx, ie, planGist) + + var metadata roachpb.CollectedStatementStatistics + metadataJSON := tree.MustBeDJSON(row[2]).JSON + if err = sqlstatsutil.DecodeStmtStatsMetadataJSON(metadataJSON, &metadata); err != nil { + return nil, serverError(ctx, err) + } + + statsJSON := tree.MustBeDJSON(row[3]).JSON + if err = sqlstatsutil.DecodeStmtStatsStatisticsJSON(statsJSON, &metadata.Stats); err != nil { + return nil, serverError(ctx, err) + } + + planJSON := tree.MustBeDJSON(row[4]).JSON + plan, err := sqlstatsutil.JSONToExplainTreePlanNode(planJSON) + if err != nil { + return nil, serverError(ctx, err) + } + metadata.Stats.SensitiveInfo.MostRecentPlanDescription = *plan + + aggInterval := tree.MustBeDInterval(row[5]).Duration + + stmt := serverpb.StatementDetailsResponse_CollectedStatementGroupedByPlanHash{ + AggregationInterval: time.Duration(aggInterval.Nanos()), + ExplainPlan: explainPlan, + PlanHash: planHash, + Stats: metadata.Stats, + } + + statements = append(statements, stmt) + } + if err != nil { + return nil, serverError(ctx, err) + } + + return statements, nil +} diff --git a/pkg/server/serverpb/status.go b/pkg/server/serverpb/status.go index dd19acd85a13..40b8da714974 100644 --- a/pkg/server/serverpb/status.go +++ b/pkg/server/serverpb/status.go @@ -29,13 +29,14 @@ type SQLStatusServer interface { ResetSQLStats(context.Context, *ResetSQLStatsRequest) (*ResetSQLStatsResponse, error) CombinedStatementStats(context.Context, *CombinedStatementsStatsRequest) (*StatementsResponse, error) Statements(context.Context, *StatementsRequest) (*StatementsResponse, error) + StatementDetails(context.Context, *StatementDetailsRequest) (*StatementDetailsResponse, error) ListDistSQLFlows(context.Context, *ListDistSQLFlowsRequest) (*ListDistSQLFlowsResponse, error) ListLocalDistSQLFlows(context.Context, *ListDistSQLFlowsRequest) (*ListDistSQLFlowsResponse, error) - Profile(ctx context.Context, request *ProfileRequest) (*JSONResponse, error) + Profile(context.Context, *ProfileRequest) (*JSONResponse, error) IndexUsageStatistics(context.Context, *IndexUsageStatisticsRequest) (*IndexUsageStatisticsResponse, error) ResetIndexUsageStats(context.Context, *ResetIndexUsageStatsRequest) (*ResetIndexUsageStatsResponse, error) TableIndexStats(context.Context, *TableIndexStatsRequest) (*TableIndexStatsResponse, error) - UserSQLRoles(ctx context.Context, request *UserSQLRolesRequest) (*UserSQLRolesResponse, error) + UserSQLRoles(context.Context, *UserSQLRolesRequest) (*UserSQLRolesResponse, error) TxnIDResolution(context.Context, *TxnIDResolutionRequest) (*TxnIDResolutionResponse, error) } diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 5caea44e067f..beb24f2dc051 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1318,6 +1318,59 @@ message CombinedStatementsStatsRequest { int64 end = 2 [(gogoproto.nullable) = true]; } +// StatementDetailsRequest requests the details of a Statement, based on its keys. +message StatementDetailsRequest { + // fingerprint_id is generated by ConstructStatementFingerprintID using: + // query, failed, implicitTxn and database. So we don't need to add them + // to the request. + string fingerprint_id = 1; + repeated string app_names = 2 [(gogoproto.nullable) = false]; + // Unix time range for aggregated statements. + int64 start = 3 [(gogoproto.nullable) = true]; + int64 end = 4 [(gogoproto.nullable) = true]; +} + +message StatementDetailsResponse { + message CollectedStatementSummary { + cockroach.sql.StatementStatisticsKey key_data = 1 [(gogoproto.nullable) = false]; + // Formatted query is the return of the key_data.query after prettify_statement. + // The value from the key_data cannot be replaced by the formatted value, because is used as is for + // diagnostic bundle. + string formatted_query = 2; + repeated string app_names = 3 [(gogoproto.nullable) = false]; + cockroach.sql.StatementStatistics stats = 4 [(gogoproto.nullable) = false]; + google.protobuf.Duration aggregation_interval = 5 [(gogoproto.nullable) = false, + (gogoproto.stdduration) = true]; + } + + message CollectedStatementGroupedByAggregatedTs { + cockroach.sql.StatementStatistics stats = 1 [(gogoproto.nullable) = false]; + google.protobuf.Duration aggregation_interval = 2 [(gogoproto.nullable) = false, + (gogoproto.stdduration) = true]; + google.protobuf.Timestamp aggregated_ts = 3 [(gogoproto.nullable) = false, (gogoproto.stdtime) = true]; + } + + message CollectedStatementGroupedByPlanHash { + cockroach.sql.StatementStatistics stats = 1 [(gogoproto.nullable) = false]; + google.protobuf.Duration aggregation_interval = 2 [(gogoproto.nullable) = false, + (gogoproto.stdduration) = true]; + string explain_plan = 4; + uint64 plan_hash = 5; + } + + // statement returns the total statistics for the statement. + CollectedStatementSummary statement = 1 [(gogoproto.nullable) = false]; + // statements_per_aggregated_ts returns the same statement from above, but with its statistics + // separated by the aggregated timestamp. + repeated CollectedStatementGroupedByAggregatedTs statements_per_aggregated_ts = 2 [(gogoproto.nullable) = false]; + // statements_per_plan_hash returns the same statement from above, but with its statistics + // separated by the plan hash. + repeated CollectedStatementGroupedByPlanHash statements_per_plan_hash = 3 [(gogoproto.nullable) = false]; + // If set and non-empty, indicates the prefix to application_name + // used for statements/queries issued internally by CockroachDB. + string internal_app_name_prefix = 4; +} + message StatementDiagnosticsReport { int64 id = 1; bool completed = 2; @@ -1827,13 +1880,17 @@ service Status { get: "/_status/statements" }; } - // Retrieve the combined in-memory and persisted statement stats by date range. rpc CombinedStatementStats(CombinedStatementsStatsRequest) returns (StatementsResponse) { option (google.api.http) = { get: "/_status/combinedstmts" }; } + rpc StatementDetails(StatementDetailsRequest) returns (StatementDetailsResponse) { + option (google.api.http) = { + get: "/_status/stmtdetails/{fingerprint_id}" + }; + } rpc CreateStatementDiagnosticsReport(CreateStatementDiagnosticsReportRequest) returns (CreateStatementDiagnosticsReportResponse) { option (google.api.http) = { post: "/_status/stmtdiagreports" diff --git a/pkg/server/tenant_status.go b/pkg/server/tenant_status.go index 714782b050cd..6aa199ada383 100644 --- a/pkg/server/tenant_status.go +++ b/pkg/server/tenant_status.go @@ -548,6 +548,23 @@ func (t *tenantStatusServer) CombinedStatementStats( t.sqlServer.internalExecutor, t.st, t.sqlServer.execCfg.SQLStatsTestingKnobs) } +func (t *tenantStatusServer) StatementDetails( + ctx context.Context, req *serverpb.StatementDetailsRequest, +) (*serverpb.StatementDetailsResponse, error) { + ctx = propagateGatewayMetadata(ctx) + ctx = t.AnnotateCtx(ctx) + + if err := t.privilegeChecker.requireViewActivityOrViewActivityRedactedPermission(ctx); err != nil { + return nil, err + } + + if t.sqlServer.SQLInstanceID() == 0 { + return nil, status.Errorf(codes.Unavailable, "instanceID not set") + } + + return getStatementDetails(ctx, req, t.sqlServer.internalExecutor, t.st, t.sqlServer.execCfg.SQLStatsTestingKnobs) +} + // Statements implements the relevant endpoint on the StatusServer by // fanning out a request to all pods on the current tenant via gRPC to collect // in-memory statistics and append them together for the caller.