From dc8c484ecb73d1e231d7fb858d1a267cdaf95a96 Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Mon, 4 Dec 2023 21:10:37 +0000 Subject: [PATCH 1/4] roachtest: use protobuf instead of json for ts_util queries `ts_util.go` provides helper functions for querying the cluster timeseries database over http. These methods previously made use of `httputil.PostJSON` for requesting timeseries. This commit updates to use `httputil.PostProtobuf`, in order to allow ignoring optional fields when the server doesn't support them; namely `TenantID`. Epic: none Release note: None --- .../tests/admission_control_tpcc_overload.go | 2 +- pkg/cmd/roachtest/tests/disk_stall.go | 4 ++-- pkg/cmd/roachtest/tests/rebalance_load.go | 2 +- pkg/cmd/roachtest/tests/ts_util.go | 20 +++++++++++-------- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go b/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go index c982a4ddb392..25857ecd9638 100644 --- a/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go +++ b/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go @@ -124,7 +124,7 @@ func verifyNodeLiveness( if err := retry.WithMaxAttempts(ctx, retry.Options{ MaxBackoff: 500 * time.Millisecond, }, 60, func() (err error) { - response, err = getMetrics(adminURLs[0], now.Add(-runDuration), now, []tsQuery{ + response, err = getMetrics(ctx, adminURLs[0], now.Add(-runDuration), now, []tsQuery{ { name: "cr.node.liveness.heartbeatfailures", queryType: total, diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index fbfe94ec4484..b457045136e8 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -148,7 +148,7 @@ func runDiskStalledDetection( } stallAt := timeutil.Now() - response := mustGetMetrics(t, adminURL, workloadStartAt, stallAt, []tsQuery{ + response := mustGetMetrics(ctx, t, adminURL, workloadStartAt, stallAt, []tsQuery{ {name: "cr.node.sql.query.count", queryType: total}, }) cum := response.Results[0].Datapoints @@ -200,7 +200,7 @@ func runDiskStalledDetection( { now := timeutil.Now() - response := mustGetMetrics(t, adminURL, workloadStartAt, now, []tsQuery{ + response := mustGetMetrics(ctx, t, adminURL, workloadStartAt, now, []tsQuery{ {name: "cr.node.sql.query.count", queryType: total}, }) cum := response.Results[0].Datapoints diff --git a/pkg/cmd/roachtest/tests/rebalance_load.go b/pkg/cmd/roachtest/tests/rebalance_load.go index 89b32a57db45..3e891ab07783 100644 --- a/pkg/cmd/roachtest/tests/rebalance_load.go +++ b/pkg/cmd/roachtest/tests/rebalance_load.go @@ -325,7 +325,7 @@ func makeStoreCPUFn( return func(ctx context.Context) ([]float64, error) { now := timeutil.Now() resp, err := getMetricsWithSamplePeriod( - url, startTime, now, statSamplePeriod, tsQueries) + ctx, url, startTime, now, statSamplePeriod, tsQueries) if err != nil { return nil, err } diff --git a/pkg/cmd/roachtest/tests/ts_util.go b/pkg/cmd/roachtest/tests/ts_util.go index 9031315c6f26..3330318b6643 100644 --- a/pkg/cmd/roachtest/tests/ts_util.go +++ b/pkg/cmd/roachtest/tests/ts_util.go @@ -48,9 +48,9 @@ type tsQuery struct { } func mustGetMetrics( - t test.Test, adminURL string, start, end time.Time, tsQueries []tsQuery, + ctx context.Context, t test.Test, adminURL string, start, end time.Time, tsQueries []tsQuery, ) tspb.TimeSeriesQueryResponse { - response, err := getMetrics(adminURL, start, end, tsQueries) + response, err := getMetrics(ctx, adminURL, start, end, tsQueries) if err != nil { t.Fatal(err) } @@ -58,13 +58,17 @@ func mustGetMetrics( } func getMetrics( - adminURL string, start, end time.Time, tsQueries []tsQuery, + ctx context.Context, adminURL string, start, end time.Time, tsQueries []tsQuery, ) (tspb.TimeSeriesQueryResponse, error) { - return getMetricsWithSamplePeriod(adminURL, start, end, defaultSamplePeriod, tsQueries) + return getMetricsWithSamplePeriod(ctx, adminURL, start, end, defaultSamplePeriod, tsQueries) } func getMetricsWithSamplePeriod( - adminURL string, start, end time.Time, samplePeriod time.Duration, tsQueries []tsQuery, + ctx context.Context, + adminURL string, + start, end time.Time, + samplePeriod time.Duration, + tsQueries []tsQuery, ) (tspb.TimeSeriesQueryResponse, error) { url := "http://" + adminURL + "/ts/query" queries := make([]tspb.Query, len(tsQueries)) @@ -99,7 +103,7 @@ func getMetricsWithSamplePeriod( Queries: queries, } var response tspb.TimeSeriesQueryResponse - err := httputil.PostJSON(http.Client{Timeout: 500 * time.Millisecond}, url, &request, &response) + err := httputil.PostProtobuf(ctx, http.Client{Timeout: 500 * time.Millisecond}, url, &request, &response) return response, err } @@ -118,7 +122,7 @@ func verifyTxnPerSecond( t.Fatal(err) } adminURL := adminUIAddrs[0] - response := mustGetMetrics(t, adminURL, start, end, []tsQuery{ + response := mustGetMetrics(ctx, t, adminURL, start, end, []tsQuery{ {name: "cr.node.txn.commits", queryType: rate}, {name: "cr.node.txn.commits", queryType: total}, }) @@ -169,7 +173,7 @@ func verifyLookupsPerSec( t.Fatal(err) } adminURL := adminUIAddrs[0] - response := mustGetMetrics(t, adminURL, start, end, []tsQuery{ + response := mustGetMetrics(ctx, t, adminURL, start, end, []tsQuery{ {name: "cr.node.distsender.rangelookups", queryType: rate}, }) From 55e2d6f9ef0c27b776bca32822b27c539208252d Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Mon, 4 Dec 2023 16:48:40 +0000 Subject: [PATCH 2/4] roachtest: port rebalance/by-load/*/mixed-version to new framework Port `rebalance/by-load/leases/mixed-version` and `rebalance/by-load/replicas/mixed-version` to the new mixed-version framework as described in #110528. Part of: #110528 Resolves: #115134 Resolves: #115135 Release note: None --- pkg/cmd/roachtest/tests/rebalance_load.go | 248 ++++++++++++---------- 1 file changed, 130 insertions(+), 118 deletions(-) diff --git a/pkg/cmd/roachtest/tests/rebalance_load.go b/pkg/cmd/roachtest/tests/rebalance_load.go index 3e891ab07783..b600ad36ada2 100644 --- a/pkg/cmd/roachtest/tests/rebalance_load.go +++ b/pkg/cmd/roachtest/tests/rebalance_load.go @@ -20,11 +20,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/clusterupgrade" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/mixedversion" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" - "github.com/cockroachdb/cockroach/pkg/testutils/release" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" @@ -82,134 +82,56 @@ func registerRebalanceLoad(r registry.Registry) { appNode := c.Node(c.Spec().NodeCount) numNodes := len(roachNodes) numStores := numNodes + startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, + "--vmodule=store_rebalancer=5,allocator=5,allocator_scorer=5,replicate_queue=5") if c.Spec().SSDs > 1 && !c.Spec().RAID0 { numStores *= c.Spec().SSDs startOpts.RoachprodOpts.StoreCount = c.Spec().SSDs } - // We want each store to end up with approximately storeToRangeFactor - // (factor) leases such that the CPU load is evenly spread, e.g. - // (n * factor) -1 splits = factor * n ranges = factor leases per store - // Note that we only assert on the CPU of each store w.r.t the mean, not - // the lease count. - splits := (numStores * storeToRangeFactor) - 1 - startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, - "--vmodule=store_rebalancer=5,allocator=5,allocator_scorer=5,replicate_queue=5") + settings := install.MakeClusterSettings() + settings.ClusterSettings["kv.allocator.load_based_rebalancing"] = rebalanceMode + settings.ClusterSettings["kv.range_split.by_load_enabled"] = "false" + if mixedVersion { - predecessorVersionStr, err := release.LatestPredecessor(t.BuildVersion()) - require.NoError(t, err) - predecessorVersion := clusterupgrade.MustParseVersion(predecessorVersionStr) - settings.Binary = uploadCockroach(ctx, t, c, c.All(), predecessorVersion) - // Upgrade some (or all) of the first N-1 CRDB nodes. We ignore the last - // CRDB node (to leave at least one node on the older version), and the - // app node. - lastNodeToUpgrade := rand.Intn(c.Spec().NodeCount-2) + 1 - t.L().Printf("upgrading %d nodes to the current cockroach binary", lastNodeToUpgrade) - nodesToUpgrade := c.Range(1, lastNodeToUpgrade) - c.Start(ctx, t.L(), startOpts, settings, roachNodes) - upgradeNodes(ctx, t, c, nodesToUpgrade, startOpts, clusterupgrade.CurrentVersion()) + mvt := mixedversion.NewTest(ctx, t, t.L(), c, roachNodes, mixedversion.NeverUseFixtures, + // The http requests to the admin UI performed by the test don't play + // well with secure clusters. As of the time of writing, they return + // either of the following errors: + // tls: failed to verify certificate: x509: “node” certificate is not standards compliant + // tls: failed to verify certificate: x509: certificate signed by unknown authority + // + // Disable secure mode for simplicity. + mixedversion.ClusterSettingOption(install.SecureOption(false)), + mixedversion.ClusterSettingOption(install.ClusterSettingsOption(settings.ClusterSettings)), + ) + mvt.InMixedVersion("rebalance load run", + func(ctx context.Context, l *logger.Logger, r *rand.Rand, h *mixedversion.Helper) error { + return rebalanceByLoad( + ctx, t, c, rebalanceMode, maxDuration, concurrency, appNode, numStores, numNodes) + }) + mvt.Run() } else { + // Enable collecting CPU profiles when the CPU utilization exceeds 90%. + // This helps debug failures which occur as a result of mismatches + // between allocation (QPS/replica CPU) and hardware signals e.g. see + // #111900. The setting names changed between v22.2 and v23.1, we can't + // easily setup CPU profiling in mixed version tests. + // + // TODO(kvoli): Remove this setup once CPU profiling is enabled by default + // on perf roachtests #97699. + settings.ClusterSettings["server.cpu_profile.duration"] = "2s" + settings.ClusterSettings["server.cpu_profile.interval"] = "2" + settings.ClusterSettings["server.cpu_profile.cpu_usage_combined_threshold"] = "90" c.Start(ctx, t.L(), startOpts, settings, roachNodes) + require.NoError(t, rebalanceByLoad( + ctx, t, c, rebalanceMode, maxDuration, + concurrency, appNode, numStores, numNodes, + )) } - c.Put(ctx, t.DeprecatedWorkload(), "./workload", appNode) - c.Run(ctx, appNode, fmt.Sprintf("./workload init kv --drop --splits=%d {pgurl:1}", splits)) - - db := c.Conn(ctx, t.L(), 1) - defer db.Close() - - require.NoError(t, WaitFor3XReplication(ctx, t, db)) - t.Status("disable load based splitting") - require.NoError(t, disableLoadBasedSplitting(ctx, db)) - t.Status(fmt.Sprintf("setting rebalance mode to %s", rebalanceMode)) - _, err := db.ExecContext(ctx, `SET CLUSTER SETTING kv.allocator.load_based_rebalancing=$1::string`, rebalanceMode) - require.NoError(t, err) - // Enable collecting CPU profiles when the CPU utilization exceeds 90%. - // This helps debug failures which occur as a result of mismatches - // between allocation (QPS/replica CPU) and hardware signals e.g. see - // #111900. - // - // TODO(kvoli): Remove this setup once CPU profiling is enabled by default - // on perf roachtests #97699. - _, err = db.ExecContext(ctx, `SET CLUSTER SETTING server.cpu_profile.duration = '2s'`) - require.NoError(t, err) - _, err = db.ExecContext(ctx, `SET CLUSTER SETTING server.cpu_profile.interval = '2m'`) - require.NoError(t, err) - _, err = db.ExecContext(ctx, `SET CLUSTER SETTING server.cpu_profile.cpu_usage_combined_threshold = 90`) - require.NoError(t, err) - - var m *errgroup.Group // see comment in version.go - m, ctx = errgroup.WithContext(ctx) - - // Enable us to exit out of workload early when we achieve the desired CPU - // balance. This drastically shortens the duration of the test in the - // common case. - ctx, cancel := context.WithCancel(ctx) - - m.Go(func() error { - t.L().Printf("starting load generator\n") - err := c.RunE(ctx, appNode, fmt.Sprintf( - "./workload run kv --read-percent=95 --tolerate-errors --concurrency=%d "+ - "--duration=%v {pgurl:1-%d}", - concurrency, maxDuration, len(roachNodes))) - if errors.Is(ctx.Err(), context.Canceled) { - // We got canceled either because CPU balance was achieved or the - // other worker hit an error. In either case, it's not this worker's - // fault. - return nil - } - return err - }) - - m.Go(func() error { - t.Status("checking for CPU balance") - - storeCPUFn, err := makeStoreCPUFn(ctx, c, t, numNodes, numStores) - if err != nil { - return err - } - - var reason string - var balancedStartTime time.Time - var prevIsBalanced bool - for tBegin := timeutil.Now(); timeutil.Since(tBegin) <= maxDuration; { - // Wait out the sample period initially to allow the timeseries to - // populate meaningful information for the test to query. - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(statSamplePeriod): - } - - now := timeutil.Now() - clusterStoresCPU, err := storeCPUFn(ctx) - if err != nil { - t.L().Printf("unable to get the cluster stores CPU %s\n", err.Error()) - continue - } - var curIsBalanced bool - curIsBalanced, reason = isLoadEvenlyDistributed(clusterStoresCPU, meanCPUTolerance) - t.L().Printf("cpu %s", reason) - if !prevIsBalanced && curIsBalanced { - balancedStartTime = now - } - prevIsBalanced = curIsBalanced - if prevIsBalanced && now.Sub(balancedStartTime) > stableDuration { - t.Status("successfully achieved CPU balance; waiting for kv to finish running") - cancel() - return nil - } - } - - return errors.Errorf("CPU not evenly balanced after timeout: %s", reason) - }) - if err := m.Wait(); err != nil { - t.Fatal(err) - } } - concurrency := 128 - r.Add( registry.TestSpec{ Name: `rebalance/by-load/leases`, @@ -301,6 +223,96 @@ func registerRebalanceLoad(r registry.Registry) { ) } +func rebalanceByLoad( + ctx context.Context, + t test.Test, + c cluster.Cluster, + rebalanceMode string, + maxDuration time.Duration, + concurrency int, + appNode option.NodeListOption, + numStores, numNodes int, +) error { + // We want each store to end up with approximately storeToRangeFactor + // (factor) leases such that the CPU load is evenly spread, e.g. + // (n * factor) -1 splits = factor * n ranges = factor leases per store + // Note that we only assert on the CPU of each store w.r.t the mean, not + // the lease count. + splits := (numStores * storeToRangeFactor) - 1 + c.Run(ctx, appNode, fmt.Sprintf("./cockroach workload init kv --drop --splits=%d {pgurl:1}", splits)) + + db := c.Conn(ctx, t.L(), 1) + defer db.Close() + + require.NoError(t, WaitFor3XReplication(ctx, t, db)) + + var m *errgroup.Group + m, ctx = errgroup.WithContext(ctx) + + // Enable us to exit out of workload early when we achieve the desired CPU + // balance. This drastically shortens the duration of the test in the + // common case. + ctx, cancel := context.WithCancel(ctx) + + m.Go(func() error { + t.L().Printf("starting load generator\n") + err := c.RunE(ctx, appNode, fmt.Sprintf( + "./cockroach workload run kv --read-percent=95 --tolerate-errors --concurrency=%d "+ + "--duration=%v {pgurl:1-%d}", + concurrency, maxDuration, numNodes)) + if errors.Is(ctx.Err(), context.Canceled) { + // We got canceled either because CPU balance was achieved or the + // other worker hit an error. In either case, it's not this worker's + // fault. + return nil + } + return err + }) + + m.Go(func() error { + t.Status("checking for CPU balance") + + storeCPUFn, err := makeStoreCPUFn(ctx, c, t, numNodes, numStores) + if err != nil { + return err + } + + var reason string + var balancedStartTime time.Time + var prevIsBalanced bool + for tBegin := timeutil.Now(); timeutil.Since(tBegin) <= maxDuration; { + // Wait out the sample period initially to allow the timeseries to + // populate meaningful information for the test to query. + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(statSamplePeriod): + } + + now := timeutil.Now() + clusterStoresCPU, err := storeCPUFn(ctx) + if err != nil { + t.L().Printf("unable to get the cluster stores CPU %s\n", err.Error()) + continue + } + var curIsBalanced bool + curIsBalanced, reason = isLoadEvenlyDistributed(clusterStoresCPU, meanCPUTolerance) + t.L().Printf("cpu %s", reason) + if !prevIsBalanced && curIsBalanced { + balancedStartTime = now + } + prevIsBalanced = curIsBalanced + if prevIsBalanced && now.Sub(balancedStartTime) > stableDuration { + t.Status("successfully achieved CPU balance; waiting for kv to finish running") + cancel() + return nil + } + } + return errors.Errorf("CPU not evenly balanced after timeout: %s", reason) + }) + return m.Wait() +} + // makeStoreCPUFn returns a function which can be called to gather the CPU of // the cluster stores. When there are multiple stores per node, stores on the // same node will report identical CPU. From 25acf632106444e539fca01da6ddee54b67ec799 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Sat, 18 Nov 2023 00:57:30 -0500 Subject: [PATCH 3/4] builtins: remove crdb_internal.gc_tenant This is an internal function that was deprecated a while ago. It's no longer used since DestroyTenant is used instead. Release note: None --- pkg/sql/faketreeeval/evalctx.go | 5 --- .../testdata/logic_test/tenant_builtins | 8 ----- pkg/sql/sem/builtins/builtins.go | 27 ---------------- pkg/sql/sem/builtins/fixed_oids.go | 1 - pkg/sql/sem/eval/deps.go | 5 --- pkg/sql/tenant_gc.go | 31 ------------------- 6 files changed, 77 deletions(-) diff --git a/pkg/sql/faketreeeval/evalctx.go b/pkg/sql/faketreeeval/evalctx.go index 28dc1174845d..cd6806a0c760 100644 --- a/pkg/sql/faketreeeval/evalctx.go +++ b/pkg/sql/faketreeeval/evalctx.go @@ -657,11 +657,6 @@ func (c *DummyTenantOperator) DropTenantByID( return errors.WithStack(errEvalTenant) } -// GCTenant is part of the tree.TenantOperator interface. -func (c *DummyTenantOperator) GCTenant(_ context.Context, _ uint64) error { - return errors.WithStack(errEvalTenant) -} - // UpdateTenantResourceLimits is part of the tree.TenantOperator interface. func (c *DummyTenantOperator) UpdateTenantResourceLimits( _ context.Context, diff --git a/pkg/sql/logictest/testdata/logic_test/tenant_builtins b/pkg/sql/logictest/testdata/logic_test/tenant_builtins index 90e6358f9672..c32035bb2d9a 100644 --- a/pkg/sql/logictest/testdata/logic_test/tenant_builtins +++ b/pkg/sql/logictest/testdata/logic_test/tenant_builtins @@ -112,11 +112,6 @@ id active name 10 true tenant-number-ten 11 true tenant-number-eleven -# Garbage collect a non-drop tenant fails. - -query error tenant 5 is not in data state DROP -SELECT crdb_internal.gc_tenant(5) - # Note this just marks the tenant as dropped but does not call GC. statement ok @@ -273,9 +268,6 @@ SELECT crdb_internal.create_tenant('not-allowed') statement error user testuser does not have MANAGEVIRTUALCLUSTER system privilege DROP TENANT [1] -statement error crdb_internal.gc_tenant\(\): user testuser does not have REPAIRCLUSTERMETADATA system privilege -SELECT crdb_internal.gc_tenant(314) - user root subtest avoid_tenant_id_reuse diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index bf93f625212c..2129f307dff2 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -6798,33 +6798,6 @@ Parameters:` + randgencfg.ConfigDoc, }, ), - "crdb_internal.gc_tenant": makeBuiltin( - // TODO(jeffswenson): Delete crdb_internal.gc_tenant after the DestroyTenant - // changes are deployed to all Cockroach Cloud serverless hosts. - tree.FunctionProperties{ - Category: builtinconstants.CategoryMultiTenancy, - Undocumented: true, - }, - tree.Overload{ - Types: tree.ParamTypes{ - {Name: "id", Typ: types.Int}, - }, - ReturnType: tree.FixedReturnType(types.Int), - Fn: func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (tree.Datum, error) { - sTenID, err := mustBeDIntInTenantRange(args[0]) - if err != nil { - return nil, err - } - if err := evalCtx.Tenant.GCTenant(ctx, uint64(sTenID)); err != nil { - return nil, err - } - return args[0], nil - }, - Info: "Garbage collects a tenant with the provided ID. Must be run by the System tenant.", - Volatility: volatility.Volatile, - }, - ), - // Used to configure the tenant token bucket. See UpdateTenantResourceLimits. "crdb_internal.update_tenant_resource_limits": makeBuiltin( tree.FunctionProperties{ diff --git a/pkg/sql/sem/builtins/fixed_oids.go b/pkg/sql/sem/builtins/fixed_oids.go index e8197451764a..367f7fafbf77 100644 --- a/pkg/sql/sem/builtins/fixed_oids.go +++ b/pkg/sql/sem/builtins/fixed_oids.go @@ -1328,7 +1328,6 @@ var builtinOidsArray = []string{ 1351: `crdb_internal.unsafe_delete_namespace_entry(parent_id: int, parent_schema_id: int, name: string, desc_id: int) -> bool`, 1352: `crdb_internal.unsafe_delete_namespace_entry(parent_id: int, parent_schema_id: int, name: string, desc_id: int, force: bool) -> bool`, 1353: `crdb_internal.sql_liveness_is_alive(session_id: bytes) -> bool`, - 1354: `crdb_internal.gc_tenant(id: int) -> int`, 1355: `crdb_internal.update_tenant_resource_limits(tenant_id: int, available_request_units: float, refill_rate: float, max_burst_request_units: float, as_of: timestamp, as_of_consumed_request_units: float) -> int`, 1356: `crdb_internal.compact_engine_span(node_id: int, store_id: int, start_key: bytes, end_key: bytes) -> bool`, 1357: `crdb_internal.increment_feature_counter(feature: string) -> bool`, diff --git a/pkg/sql/sem/eval/deps.go b/pkg/sql/sem/eval/deps.go index c3ef469af8e0..6a7c9dacd3bc 100644 --- a/pkg/sql/sem/eval/deps.go +++ b/pkg/sql/sem/eval/deps.go @@ -649,11 +649,6 @@ type TenantOperator interface { // the gc job will not wait for a GC ttl. DropTenantByID(ctx context.Context, tenantID uint64, synchronous, ignoreServiceMode bool) error - // GCTenant attempts to garbage collect a DROP tenant from the system. Upon - // success it also removes the tenant record. - // It returns an error if the tenant does not exist. - GCTenant(ctx context.Context, tenantID uint64) error - // LookupTenantID returns the ID for the given tenant name.o LookupTenantID(ctx context.Context, tenantName roachpb.TenantName) (roachpb.TenantID, error) diff --git a/pkg/sql/tenant_gc.go b/pkg/sql/tenant_gc.go index a54a59858cfd..d8bc0225235a 100644 --- a/pkg/sql/tenant_gc.go +++ b/pkg/sql/tenant_gc.go @@ -20,9 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/sql/isql" - "github.com/cockroachdb/cockroach/pkg/sql/privilege" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" - "github.com/cockroachdb/cockroach/pkg/sql/syntheticprivilege" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" @@ -124,32 +122,3 @@ func clearTenant(ctx context.Context, execCfg *ExecutorConfig, info *mtinfopb.Te return errors.Wrapf(execCfg.DB.Run(ctx, b), "clearing tenant %d data", info.ID) } - -// GCTenant implements the tree.TenantOperator interface. -// This is the function used by the crdb_internal.gc_tenant built-in function. -// It garbage-collects a tenant already in the DROP state. -// -// TODO(jeffswenson): Delete crdb_internal.gc_tenant after the DestroyTenant -// changes are deployed to all Cockroach Cloud serverless hosts. -func (p *planner) GCTenant(ctx context.Context, tenID uint64) error { - if !p.extendedEvalCtx.TxnIsSingleStmt { - return errors.Errorf("gc_tenant cannot be used inside a multi-statement transaction") - } - if err := p.CheckPrivilege(ctx, syntheticprivilege.GlobalPrivilegeObject, privilege.REPAIRCLUSTERMETADATA); err != nil { - return err - } - info, err := GetTenantRecordByID(ctx, p.InternalSQLTxn(), roachpb.MustMakeTenantID(tenID), p.ExecCfg().Settings) - if err != nil { - return errors.Wrapf(err, "retrieving tenant %d", tenID) - } - - // Confirm tenant is ready to be cleared. - if info.DataState != mtinfopb.DataStateDrop { - return errors.Errorf("tenant %d is not in data state DROP", info.ID) - } - - _, err = createGCTenantJob( - ctx, p.ExecCfg().JobRegistry, p.InternalSQLTxn(), p.User(), tenID, false, /* synchronous */ - ) - return err -} From 9452d4baaacdcd0a3358ab7e4c9b1f8af854e455 Mon Sep 17 00:00:00 2001 From: Ricky Stewart Date: Wed, 6 Dec 2023 15:44:10 -0600 Subject: [PATCH 4/4] workflows: update to run unit tests in GitHub Actions job Epic: CRDB-8308 Release note: None --- .../workflows/experimental-github-actions-testing.yml | 11 ++++++++++- build/github/get-engflow-keys.sh | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100755 build/github/get-engflow-keys.sh diff --git a/.github/workflows/experimental-github-actions-testing.yml b/.github/workflows/experimental-github-actions-testing.yml index edecb8009278..324f1fa3addd 100644 --- a/.github/workflows/experimental-github-actions-testing.yml +++ b/.github/workflows/experimental-github-actions-testing.yml @@ -7,4 +7,13 @@ jobs: test_job: runs-on: [self-hosted, basic_runner_group] steps: - - run: echo hi + - uses: actions/checkout@v4 + with: + # By default, checkout merges the PR into the current master. + # Instead, we want to check out the PR as is. + ref: ${{ github.event.pull_request.head.sha }} + - run: ./build/github/get-engflow-keys.sh + - run: bazel test //pkg:all_tests --config engflowpublic --config crosslinux --jobs 300 --tls_client_certificate=/home/agent/engflow.crt --tls_client_key=/home/agent/engflow.key --remote_download_minimal + - name: clean up + if: always() + run: rm -f /home/agent/engflow.* diff --git a/build/github/get-engflow-keys.sh b/build/github/get-engflow-keys.sh new file mode 100755 index 000000000000..eb535586fe9e --- /dev/null +++ b/build/github/get-engflow-keys.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +gcloud secrets versions access 1 --secret=engflow-mesolite-key > /home/agent/engflow.key +gcloud secrets versions access 1 --secret=engflow-mesolite-crt > /home/agent/engflow.crt