From 66aea1ca4b10dc761a981aeaa56d876c967736ea Mon Sep 17 00:00:00 2001 From: Kyle Wong <37189875+kyle-a-wong@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:52:15 -0500 Subject: [PATCH 1/3] ui: add metric assertions to overview page e2e test Adds assertions to the Overview page to make sure that data from nodes_ui is properly fetched and rendered on the page. Epic: None Release note: None --- .../cypress/e2e/health-check/authenticated.cy.ts | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/ui/workspaces/e2e-tests/cypress/e2e/health-check/authenticated.cy.ts b/pkg/ui/workspaces/e2e-tests/cypress/e2e/health-check/authenticated.cy.ts index 5f1525b0598a..f260e61381e0 100644 --- a/pkg/ui/workspaces/e2e-tests/cypress/e2e/health-check/authenticated.cy.ts +++ b/pkg/ui/workspaces/e2e-tests/cypress/e2e/health-check/authenticated.cy.ts @@ -22,7 +22,15 @@ describe("health check: authenticated user", () => { cy.findByText("Capacity Usage", { selector: "h3>span" }); cy.findByText("Node Status"); cy.findByText("Replication Status"); - + // Asserts that storage used from nodes_ui metrics is populated + cy.get(".cluster-summary__metric.storage-used").should( + isTextGreaterThanZero, + ); + // Asserts that storage usable from nodes_ui metrics is populated + cy.get(".cluster-summary__metric.storage-usable").should( + isTextGreaterThanZero, + ); + cy.get(".cluster-summary__metric.live-nodes").should(isTextGreaterThanZero); // Check for sidebar contents cy.findByRole("navigation").within(() => { cy.findByRole("link", { name: "Overview" }); @@ -35,3 +43,9 @@ describe("health check: authenticated user", () => { }); }); }); + +const isTextGreaterThanZero = (ele: JQuery) => { + const text = ele.get()[0].innerText; + const textAsFloat = parseFloat(text); + expect(textAsFloat).to.be.greaterThan(0); +}; From c47ef40b7d55800f1482d43c0b513bad3c8bfe00 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Thu, 5 Dec 2024 17:42:28 -0500 Subject: [PATCH 2/3] sql: reduce allocations in `NewPlanningCtxWithOracle` Release note: None --- pkg/sql/distsql_physical_planner.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go index 4ddf751f8dd6..09c5be0e62be 100644 --- a/pkg/sql/distsql_physical_planner.go +++ b/pkg/sql/distsql_physical_planner.go @@ -918,6 +918,9 @@ type PlanningCtx struct { // local flow finished running and is being cleaned up. It allows us to // release the resources that are acquired during the physical planning and // are being held onto throughout the whole flow lifecycle. + // Note that these functions might not be called in some cases (when + // DistSQLPlanner.Run is not called), but that is ok since on the main query + // path it will get called. onFlowCleanup []func() // This is true if plan is a simple insert that can be vectorized. @@ -1072,7 +1075,13 @@ func flowSpecsToDiagram( // local flow finished running. This can be called only after the physical // planning has been completed. func (p *PlanningCtx) getCleanupFunc() func() { + if len(p.onFlowCleanup) == 0 { + // Make sure to release the physical infrastructure after the execution + // finishes. + return p.infra.Release + } return func() { + p.infra.Release() for _, r := range p.onFlowCleanup { r() } @@ -5049,6 +5058,9 @@ func (dsp *DistSQLPlanner) NewPlanningCtxWithOracle( localityFiler roachpb.Locality, ) *PlanningCtx { distribute := distributionType == FullDistribution + // Note: infra.Release is not added to the planCtx's onFlowCleanup + // functions below. It is instead called directly in the function + // returned by getCleanupFunc. infra := physicalplan.NewPhysicalInfrastructure(uuid.MakeV4(), dsp.gatewaySQLInstanceID) planCtx := &PlanningCtx{ ExtendedEvalCtx: evalCtx, @@ -5056,11 +5068,6 @@ func (dsp *DistSQLPlanner) NewPlanningCtxWithOracle( infra: infra, isLocal: !distribute, planner: planner, - // Make sure to release the physical infrastructure after the execution - // finishes. Note that onFlowCleanup might not be called in some cases - // (when DistSQLPlanner.Run is not called), but that is ok since on the - // main query path it will get called. - onFlowCleanup: []func(){infra.Release}, } if !distribute { if planner == nil || From 2b421ea0cd6c9a6547e7a673fb233594444f9311 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Fri, 6 Dec 2024 13:03:52 -0500 Subject: [PATCH 3/3] sql/schemachanger: log schema change job latency in structured log Release note (ops change): When a schema change job is completed, rolls back, or encounteres a failure, the time taken since the job began is now logged in a structured log in the SQL_SCHEMA log channel. --- docs/generated/eventlog.md | 7 +++++ .../logictest/testdata/logic_test/event_log | 19 ++++++++----- .../testdata/logic_test/event_log_legacy | 19 ++++++++----- .../testdata/logic_test/new_schema_changer | 13 ++++++--- pkg/sql/schema_changer.go | 15 ++++++++--- .../scdeps/sctestdeps/test_deps.go | 3 +++ pkg/sql/schemachanger/scjob/job.go | 2 ++ pkg/sql/schemachanger/scrun/BUILD.bazel | 1 + pkg/sql/schemachanger/scrun/scrun.go | 19 +++++++++---- pkg/sql/schemachanger/sctest/BUILD.bazel | 1 + pkg/sql/schemachanger/sctest/end_to_end.go | 4 ++- pkg/util/log/eventpb/ddl_events.proto | 6 +++++ pkg/util/log/eventpb/json_encode_generated.go | 27 +++++++++++++++++++ 13 files changed, 108 insertions(+), 28 deletions(-) diff --git a/docs/generated/eventlog.md b/docs/generated/eventlog.md index 49764bb701be..b9087535a238 100644 --- a/docs/generated/eventlog.md +++ b/docs/generated/eventlog.md @@ -1686,6 +1686,9 @@ An event of type `finish_schema_change` is recorded when a previously initiated change has completed. +| Field | Description | Sensitive | +|--|--|--| +| `LatencyNanos` | The amount of time the schema change job took to complete. | no | #### Common fields @@ -1704,6 +1707,9 @@ An event of type `finish_schema_change_rollback` is recorded when a previously initiated schema change rollback has completed. +| Field | Description | Sensitive | +|--|--|--| +| `LatencyNanos` | The amount of time the schema change job took to rollback. | no | #### Common fields @@ -1868,6 +1874,7 @@ encounters a problem and is reversed. |--|--|--| | `Error` | The error encountered that caused the schema change to be reversed. The specific format of the error is variable and can change across releases without warning. | yes | | `SQLSTATE` | The SQLSTATE code for the error. | no | +| `LatencyNanos` | The amount of time the schema change job took before being reverted. | no | #### Common fields diff --git a/pkg/sql/logictest/testdata/logic_test/event_log b/pkg/sql/logictest/testdata/logic_test/event_log index 2257492df97c..fc7b69de1eec 100644 --- a/pkg/sql/logictest/testdata/logic_test/event_log +++ b/pkg/sql/logictest/testdata/logic_test/event_log @@ -117,11 +117,16 @@ WHERE "eventType" = 'alter_table' 1 {"EventType": "alter_table", "MutationID": 1, "Statement": "ALTER TABLE test.public.a ADD COLUMN val INT8", "TableName": "test.public.a", "Tag": "ALTER TABLE", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ---- 1 {"EventType": "finish_schema_change", "InstanceID": 1} +# Verify that LatencyNanos is populated. +statement ok +SELECT 1 / coalesce((info::JSONB->'LatencyNanos')::INT, 0) FROM system.eventlog +WHERE "eventType" = 'finish_schema_change' + query I SELECT "reportingID" FROM system.eventlog WHERE "eventType" = 'reverse_schema_change' @@ -156,20 +161,20 @@ ORDER BY "timestamp", info 1 {"EventType": "create_index", "IndexName": "foo", "MutationID": 1, "Statement": "ALTER TABLE test.public.a ADD CONSTRAINT foo UNIQUE (val)", "TableName": "test.public.a", "Tag": "ALTER TABLE", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ---- 1 {"EventType": "finish_schema_change", "InstanceID": 1} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'Error' +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'Error' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'reverse_schema_change' ---- 1 {"EventType": "reverse_schema_change", "InstanceID": 1, "SQLSTATE": "23505"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change_rollback' ---- 1 {"EventType": "finish_schema_change_rollback", "InstanceID": 1} @@ -188,7 +193,7 @@ WHERE "eventType" = 'create_index' 1 {"EventType": "create_index", "IndexName": "a_foo", "MutationID": 1, "Statement": "CREATE INDEX a_foo ON test.public.a (val)", "TableName": "test.public.a", "Tag": "CREATE INDEX", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ORDER BY "timestamp", info ---- @@ -206,7 +211,7 @@ WHERE "eventType" = 'create_index' 1 {"EventType": "create_index", "IndexName": "a_val_idx", "MutationID": 1, "Statement": "CREATE INDEX ON test.public.a (val)", "TableName": "test.public.a", "Tag": "CREATE INDEX", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ORDER BY "timestamp", info ---- @@ -228,7 +233,7 @@ WHERE "eventType" = 'drop_index' 1 {"EventType": "drop_index", "IndexName": "a_foo", "MutationID": 1, "Statement": "DROP INDEX test.public.a@a_foo", "TableName": "test.public.a", "Tag": "DROP INDEX", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ORDER BY "timestamp", info ---- diff --git a/pkg/sql/logictest/testdata/logic_test/event_log_legacy b/pkg/sql/logictest/testdata/logic_test/event_log_legacy index 80dbd9fd7a20..efb0bd0334a8 100644 --- a/pkg/sql/logictest/testdata/logic_test/event_log_legacy +++ b/pkg/sql/logictest/testdata/logic_test/event_log_legacy @@ -117,11 +117,16 @@ WHERE "eventType" = 'alter_table' 1 {"EventType": "alter_table", "MutationID": 1, "Statement": "ALTER TABLE test.public.a ADD COLUMN val INT8", "TableName": "test.public.a", "Tag": "ALTER TABLE", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ---- 1 {"EventType": "finish_schema_change", "InstanceID": 1, "MutationID": 1} +# Verify that LatencyNanos is populated. +statement ok +SELECT 1 / coalesce((info::JSONB->'LatencyNanos')::INT, 0) FROM system.eventlog +WHERE "eventType" = 'finish_schema_change' + query I SELECT "reportingID" FROM system.eventlog WHERE "eventType" = 'reverse_schema_change' @@ -156,13 +161,13 @@ ORDER BY "timestamp", info 1 {"EventType": "alter_table", "MutationID": 2, "Statement": "ALTER TABLE test.public.a ADD CONSTRAINT foo UNIQUE (val)", "TableName": "test.public.a", "Tag": "ALTER TABLE", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ---- 1 {"EventType": "finish_schema_change", "InstanceID": 1, "MutationID": 1} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'Error' +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'Error' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'reverse_schema_change' ---- @@ -170,7 +175,7 @@ WHERE "eventType" = 'reverse_schema_change' query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change_rollback' ---- 1 {"EventType": "finish_schema_change_rollback", "InstanceID": 1, "MutationID": 2} @@ -189,7 +194,7 @@ WHERE "eventType" = 'create_index' 1 {"EventType": "create_index", "IndexName": "a_foo", "MutationID": 3, "Statement": "CREATE INDEX a_foo ON test.public.a (val)", "TableName": "test.public.a", "Tag": "CREATE INDEX", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ORDER BY "timestamp", info ---- @@ -207,7 +212,7 @@ WHERE "eventType" = 'create_index' 1 {"EventType": "create_index", "IndexName": "a_val_idx", "MutationID": 4, "Statement": "CREATE INDEX ON test.public.a (val)", "TableName": "test.public.a", "Tag": "CREATE INDEX", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ORDER BY "timestamp", info ---- @@ -229,7 +234,7 @@ WHERE "eventType" = 'drop_index' 1 {"EventType": "drop_index", "IndexName": "a_foo", "MutationID": 5, "Statement": "DROP INDEX test.public.a@a_foo", "TableName": "test.public.a", "Tag": "DROP INDEX", "User": "root"} query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' FROM system.eventlog +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog WHERE "eventType" = 'finish_schema_change' ORDER BY "timestamp", info ---- diff --git a/pkg/sql/logictest/testdata/logic_test/new_schema_changer b/pkg/sql/logictest/testdata/logic_test/new_schema_changer index 609b4b18664c..797c77c6b49f 100644 --- a/pkg/sql/logictest/testdata/logic_test/new_schema_changer +++ b/pkg/sql/logictest/testdata/logic_test/new_schema_changer @@ -1100,13 +1100,18 @@ statement ok DROP VIEW v1ev CASCADE; query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog ORDER BY "timestamp", info DESC ---- 1 {"CascadeDroppedViews": ["test.public.v4ev"], "EventType": "drop_view", "Statement": "DROP VIEW test.public.v1ev CASCADE", "Tag": "DROP VIEW", "User": "root", "ViewName": "test.public.v1ev"} 1 {"EventType": "finish_schema_change", "InstanceID": 1} +# Verify that LatencyNanos is populated. +statement ok +SELECT 1 / coalesce((info::JSONB->'LatencyNanos')::INT, 0) FROM system.eventlog +WHERE "eventType" = 'finish_schema_change' + statement ok CREATE VIEW v1ev AS (SELECT name FROM T1EV); @@ -1120,7 +1125,7 @@ statement ok DROP TABLE t1ev,t2ev CASCADE; query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog ORDER BY timestamp, info DESC; ---- @@ -1139,7 +1144,7 @@ statement ok ALTER TABLE fooev ADD COLUMN j INT query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog ORDER BY timestamp, info DESC; ---- @@ -1187,7 +1192,7 @@ statement ok DROP DATABASE db2 cascade; query IT -SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' +SELECT "reportingID", info::JSONB - 'Timestamp' - 'DescriptorID' - 'LatencyNanos' FROM system.eventlog ORDER BY timestamp, info DESC; ---- diff --git a/pkg/sql/schema_changer.go b/pkg/sql/schema_changer.go index e1d339552fe6..e3d40b05106e 100644 --- a/pkg/sql/schema_changer.go +++ b/pkg/sql/schema_changer.go @@ -1932,11 +1932,16 @@ func (sc *SchemaChanger) done(ctx context.Context) error { return err } + startTime := timeutil.FromUnixMicros(sc.job.Payload().StartedMicros) var info logpb.EventPayload if isRollback { - info = &eventpb.FinishSchemaChangeRollback{} + info = &eventpb.FinishSchemaChangeRollback{ + LatencyNanos: timeutil.Since(startTime).Nanoseconds(), + } } else { - info = &eventpb.FinishSchemaChange{} + info = &eventpb.FinishSchemaChange{ + LatencyNanos: timeutil.Since(startTime).Nanoseconds(), + } } // Log "Finish Schema Change" or "Finish Schema Change Rollback" @@ -2217,14 +2222,16 @@ func (sc *SchemaChanger) maybeReverseMutations(ctx context.Context, causingError // Log "Reverse Schema Change" event. Only the causing error and the // mutation ID are logged; this can be correlated with the DDL statement // that initiated the change using the mutation id. + startTime := timeutil.FromUnixMicros(sc.job.Payload().StartedMicros) return logEventInternalForSchemaChanges( ctx, sc.execCfg, txn, sc.sqlInstanceID, sc.descID, sc.mutationID, &eventpb.ReverseSchemaChange{ - Error: fmt.Sprintf("%+v", causingError), - SQLSTATE: pgerror.GetPGCode(causingError).String(), + Error: fmt.Sprintf("%+v", causingError), + SQLSTATE: pgerror.GetPGCode(causingError).String(), + LatencyNanos: timeutil.Since(startTime).Nanoseconds(), }) }) if err != nil || alreadyReversed { diff --git a/pkg/sql/schemachanger/scdeps/sctestdeps/test_deps.go b/pkg/sql/schemachanger/scdeps/sctestdeps/test_deps.go index a0733008ce85..f193caa55abd 100644 --- a/pkg/sql/schemachanger/scdeps/sctestdeps/test_deps.go +++ b/pkg/sql/schemachanger/scdeps/sctestdeps/test_deps.go @@ -1299,6 +1299,9 @@ func (s *TestState) logEvent(event logpb.EventPayload) error { // Remove common details from text output, they're never decorated. if inM, ok := in.(map[string]interface{}); ok { delete(inM, "common") + + // Also remove latency measurement, since it's not deterministic. + delete(inM, "latencyNanos") } }) if err != nil { diff --git a/pkg/sql/schemachanger/scjob/job.go b/pkg/sql/schemachanger/scjob/job.go index e0bfbb540315..145782940e75 100644 --- a/pkg/sql/schemachanger/scjob/job.go +++ b/pkg/sql/schemachanger/scjob/job.go @@ -141,11 +141,13 @@ func (n *newSchemaChangeResumer) run(ctx context.Context, execCtxI interface{}) return nil } + startTime := timeutil.FromUnixMicros(payload.StartedMicros) err := scrun.RunSchemaChangesInJob( ctx, execCfg.DeclarativeSchemaChangerTestingKnobs, n.deps, n.job.ID(), + startTime, payload.DescriptorIDs, n.rollbackCause, ) diff --git a/pkg/sql/schemachanger/scrun/BUILD.bazel b/pkg/sql/schemachanger/scrun/BUILD.bazel index a3e854519beb..8506af88cd71 100644 --- a/pkg/sql/schemachanger/scrun/BUILD.bazel +++ b/pkg/sql/schemachanger/scrun/BUILD.bazel @@ -29,6 +29,7 @@ go_library( "//pkg/util/log/logpb", "//pkg/util/mon", "//pkg/util/protoutil", + "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_redact//:redact", ], diff --git a/pkg/sql/schemachanger/scrun/scrun.go b/pkg/sql/schemachanger/scrun/scrun.go index 6f3ca918fe9f..2c84a84c55f9 100644 --- a/pkg/sql/schemachanger/scrun/scrun.go +++ b/pkg/sql/schemachanger/scrun/scrun.go @@ -8,6 +8,7 @@ package scrun import ( "context" "fmt" + "time" "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/jobs" @@ -27,6 +28,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log/eventpb" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" ) @@ -112,6 +114,7 @@ func RunSchemaChangesInJob( knobs *scexec.TestingKnobs, deps JobRunDependencies, jobID jobspb.JobID, + jobStartTime time.Time, descriptorIDs []descpb.ID, rollbackCause error, ) error { @@ -120,7 +123,7 @@ func RunSchemaChangesInJob( return err } } - p, err := makePostCommitPlan(ctx, deps, jobID, descriptorIDs, rollbackCause) + p, err := makePostCommitPlan(ctx, deps, jobID, jobStartTime, descriptorIDs, rollbackCause) if err != nil { if knobs != nil && knobs.OnPostCommitPlanError != nil { return knobs.OnPostCommitPlanError(err) @@ -141,9 +144,13 @@ func RunSchemaChangesInJob( if i+1 == len(p.Stages) { var template eventpb.EventWithCommonSchemaChangePayload if p.CurrentState.InRollback { - template = &eventpb.FinishSchemaChangeRollback{} + template = &eventpb.FinishSchemaChangeRollback{ + LatencyNanos: timeutil.Since(jobStartTime).Nanoseconds(), + } } else { - template = &eventpb.FinishSchemaChange{} + template = &eventpb.FinishSchemaChange{ + LatencyNanos: timeutil.Since(jobStartTime).Nanoseconds(), + } } if err := logSchemaChangeEvents(ctx, el, p.CurrentState, template); err != nil { return err @@ -218,6 +225,7 @@ func makePostCommitPlan( ctx context.Context, deps JobRunDependencies, jobID jobspb.JobID, + jobStartTime time.Time, descriptorIDs []descpb.ID, rollbackCause error, ) (scplan.Plan, error) { @@ -252,8 +260,9 @@ func makePostCommitPlan( // Revert the schema change and write about it in the event log. state.Rollback() return logSchemaChangeEvents(ctx, eventLogger, state, &eventpb.ReverseSchemaChange{ - Error: fmt.Sprintf("%v", rollbackCause), - SQLSTATE: pgerror.GetPGCode(rollbackCause).String(), + Error: fmt.Sprintf("%v", rollbackCause), + SQLSTATE: pgerror.GetPGCode(rollbackCause).String(), + LatencyNanos: timeutil.Since(jobStartTime).Nanoseconds(), }) } return nil diff --git a/pkg/sql/schemachanger/sctest/BUILD.bazel b/pkg/sql/schemachanger/sctest/BUILD.bazel index a99d07d7db44..36c659b7a865 100644 --- a/pkg/sql/schemachanger/sctest/BUILD.bazel +++ b/pkg/sql/schemachanger/sctest/BUILD.bazel @@ -53,6 +53,7 @@ go_library( "//pkg/util/log", "//pkg/util/mon", "//pkg/util/syncutil", + "//pkg/util/timeutil", "@com_github_cockroachdb_cockroach_go_v2//crdb", "@com_github_cockroachdb_datadriven//:datadriven", "@com_github_cockroachdb_errors//:errors", diff --git a/pkg/sql/schemachanger/sctest/end_to_end.go b/pkg/sql/schemachanger/sctest/end_to_end.go index 201ca427812d..58074ac446ee 100644 --- a/pkg/sql/schemachanger/sctest/end_to_end.go +++ b/pkg/sql/schemachanger/sctest/end_to_end.go @@ -43,6 +43,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/datadriven" "github.com/stretchr/testify/require" ) @@ -312,6 +313,7 @@ func execStatementWithTestDeps( var state scpb.CurrentState var logSchemaChangesFn scbuild.LogSchemaChangerEventsFn var err error + startTime := timeutil.Now() deps.WithTxn(func(s *sctestdeps.TestState) { // Run statement phase. @@ -338,7 +340,7 @@ func execStatementWithTestDeps( deps.IncrementPhase() deps.LogSideEffectf("# begin %s", deps.Phase()) err = scrun.RunSchemaChangesInJob( - ctx, deps.TestingKnobs(), deps, jobID, job.DescriptorIDs, nil, /* rollbackCause */ + ctx, deps.TestingKnobs(), deps, jobID, startTime, job.DescriptorIDs, nil, /* rollbackCause */ ) require.NoError(t, err, "error in mock schema change job execution") deps.LogSideEffectf("# end %s", deps.Phase()) diff --git a/pkg/util/log/eventpb/ddl_events.proto b/pkg/util/log/eventpb/ddl_events.proto index fdb0575a60b4..8a10b867eb80 100644 --- a/pkg/util/log/eventpb/ddl_events.proto +++ b/pkg/util/log/eventpb/ddl_events.proto @@ -421,6 +421,8 @@ message ReverseSchemaChange { string error = 4 [(gogoproto.jsontag) = ",omitempty"]; // The SQLSTATE code for the error. string sqlstate = 5 [(gogoproto.customname) = "SQLSTATE", (gogoproto.jsontag) = ",omitempty", (gogoproto.moretags) = "redact:\"nonsensitive\""]; + // The amount of time the schema change job took before being reverted. + int64 latency_nanos = 6 [(gogoproto.jsontag) = ",omitempty"]; } // FinishSchemaChange is recorded when a previously initiated schema @@ -428,6 +430,8 @@ message ReverseSchemaChange { message FinishSchemaChange { CommonEventDetails common = 1 [(gogoproto.nullable) = false, (gogoproto.jsontag) = "", (gogoproto.embed) = true]; CommonSchemaChangeEventDetails sc = 2 [(gogoproto.nullable) = false, (gogoproto.jsontag) = "", (gogoproto.embed) = true]; + // The amount of time the schema change job took to complete. + int64 latency_nanos = 3 [(gogoproto.jsontag) = ",omitempty"]; } // FinishSchemaChangeRollback is recorded when a previously @@ -435,6 +439,8 @@ message FinishSchemaChange { message FinishSchemaChangeRollback { CommonEventDetails common = 1 [(gogoproto.nullable) = false, (gogoproto.jsontag) = "", (gogoproto.embed) = true]; CommonSchemaChangeEventDetails sc = 2 [(gogoproto.nullable) = false, (gogoproto.jsontag) = "", (gogoproto.embed) = true]; + // The amount of time the schema change job took to rollback. + int64 latency_nanos = 3 [(gogoproto.jsontag) = ",omitempty"]; } diff --git a/pkg/util/log/eventpb/json_encode_generated.go b/pkg/util/log/eventpb/json_encode_generated.go index 7e5c74b65539..6a4af9353536 100644 --- a/pkg/util/log/eventpb/json_encode_generated.go +++ b/pkg/util/log/eventpb/json_encode_generated.go @@ -3145,6 +3145,15 @@ func (m *FinishSchemaChange) AppendJSONFields(printComma bool, b redact.Redactab printComma, b = m.CommonSchemaChangeEventDetails.AppendJSONFields(printComma, b) + if m.LatencyNanos != 0 { + if printComma { + b = append(b, ',') + } + printComma = true + b = append(b, "\"LatencyNanos\":"...) + b = strconv.AppendInt(b, int64(m.LatencyNanos), 10) + } + return printComma, b } @@ -3155,6 +3164,15 @@ func (m *FinishSchemaChangeRollback) AppendJSONFields(printComma bool, b redact. printComma, b = m.CommonSchemaChangeEventDetails.AppendJSONFields(printComma, b) + if m.LatencyNanos != 0 { + if printComma { + b = append(b, ',') + } + printComma = true + b = append(b, "\"LatencyNanos\":"...) + b = strconv.AppendInt(b, int64(m.LatencyNanos), 10) + } + return printComma, b } @@ -4286,6 +4304,15 @@ func (m *ReverseSchemaChange) AppendJSONFields(printComma bool, b redact.Redacta b = append(b, '"') } + if m.LatencyNanos != 0 { + if printComma { + b = append(b, ',') + } + printComma = true + b = append(b, "\"LatencyNanos\":"...) + b = strconv.AppendInt(b, int64(m.LatencyNanos), 10) + } + return printComma, b }