From f0da5e8ec3dbc27bbba43efa7ed8322212d2e7c9 Mon Sep 17 00:00:00 2001 From: Herko Lategan Date: Tue, 13 Jun 2023 14:49:26 +0100 Subject: [PATCH 1/2] roachprod: fix confusing start-up error Starting a cluster locally does a check for certificates. In the event the certificates are not found, which is a valid case, an error is printed. The start command works correctly, but the error can cause confusion: ```bash /bin/roachprod start local --secure 12:47:56 cluster_synced.go:2475: 0: COMMAND_PROBLEM: exit status 1 (1) COMMAND_PROBLEM Wraps: (2) exit status 1 Error types: (1) errors.Cmd (2) *exec.ExitError: local: initializing certs 1/1 / local: distributing certs 2/2 ``` This change modifies the command to do a check without causing an error, and also propagates any real errors that could occur while doing the check. --- pkg/roachprod/install/cluster_synced.go | 40 +++++++++++-------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/pkg/roachprod/install/cluster_synced.go b/pkg/roachprod/install/cluster_synced.go index 895ccd3c949d..f130e346a074 100644 --- a/pkg/roachprod/install/cluster_synced.go +++ b/pkg/roachprod/install/cluster_synced.go @@ -1299,7 +1299,9 @@ const ( // DistributeCerts will generate and distribute certificates to all of the // nodes. func (c *SyncedCluster) DistributeCerts(ctx context.Context, l *logger.Logger) error { - if c.checkForCertificates(ctx, l) { + if found, err := c.checkForCertificates(ctx, l); err != nil { + return err + } else if found { return nil } @@ -1375,11 +1377,15 @@ tar cvf %[3]s certs func (c *SyncedCluster) DistributeTenantCerts( ctx context.Context, l *logger.Logger, hostCluster *SyncedCluster, tenantID int, ) error { - if hostCluster.checkForTenantCertificates(ctx, l) { + if found, err := hostCluster.checkForTenantCertificates(ctx, l); err != nil { + return err + } else if found { return nil } - if !hostCluster.checkForCertificates(ctx, l) { + if found, err := hostCluster.checkForCertificates(ctx, l); err != nil { + return err + } else if !found { return errors.New("host cluster missing certificate bundle") } @@ -1489,7 +1495,7 @@ func (c *SyncedCluster) getFileFromFirstNode( // checkForCertificates checks if the cluster already has a certs bundle created // on the first node. -func (c *SyncedCluster) checkForCertificates(ctx context.Context, l *logger.Logger) bool { +func (c *SyncedCluster) checkForCertificates(ctx context.Context, l *logger.Logger) (bool, error) { dir := "" if c.IsLocal() { dir = c.localVMDir(1) @@ -1499,7 +1505,9 @@ func (c *SyncedCluster) checkForCertificates(ctx context.Context, l *logger.Logg // checkForTenantCertificates checks if the cluster already has a tenant-certs bundle created // on the first node. -func (c *SyncedCluster) checkForTenantCertificates(ctx context.Context, l *logger.Logger) bool { +func (c *SyncedCluster) checkForTenantCertificates( + ctx context.Context, l *logger.Logger, +) (bool, error) { dir := "" if c.IsLocal() { dir = c.localVMDir(1) @@ -1509,24 +1517,10 @@ func (c *SyncedCluster) checkForTenantCertificates(ctx context.Context, l *logge func (c *SyncedCluster) fileExistsOnFirstNode( ctx context.Context, l *logger.Logger, path string, -) bool { - var existsErr error - display := fmt.Sprintf("%s: checking %s", c.Name, path) - if err := c.Parallel(ctx, l, 1, func(ctx context.Context, i int) (*RunResultDetails, error) { - node := c.Nodes[i] - sess := c.newSession(l, node, `test -e `+path) - defer sess.Close() - - out, cmdErr := sess.CombinedOutput(ctx) - res := newRunResultDetails(node, cmdErr) - res.CombinedOut = out - - existsErr = res.Err - return res, nil - }, WithDisplay(display)); err != nil { - return false - } - return existsErr == nil +) (bool, error) { + l.Printf("%s: checking %s", c.Name, path) + result, err := c.runCmdOnSingleNode(ctx, l, c.Nodes[0], `$(test -e `+path+`); echo $?`, false, l.Stdout, l.Stderr) + return result.Stdout == "0", err } // createNodeCertArguments returns a list of strings appropriate for use as From 06cd54a4e30bcec4120d564983506936f8203163 Mon Sep 17 00:00:00 2001 From: Herko Lategan Date: Tue, 13 Jun 2023 12:50:58 +0100 Subject: [PATCH 2/2] roachtest: remove unused create tenant options The `otherTenantIDs` field on `createTenantOptions` is no longer used. Epic: CRDB-23559 --- .../tests/admission_control_multitenant_fairness.go | 8 +------- pkg/cmd/roachtest/tests/multitenant_upgrade.go | 11 +++++------ pkg/cmd/roachtest/tests/multitenant_utils.go | 11 ----------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go b/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go index adf4deb245e2..80608b448682 100644 --- a/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go +++ b/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go @@ -203,11 +203,6 @@ func runMultiTenantFairness( // Create the tenants. t.L().Printf("initializing %d tenants (<%s)", numTenants, 5*time.Minute) - tenantIDs := make([]int, 0, numTenants) - for i := 0; i < numTenants; i++ { - tenantIDs = append(tenantIDs, tenantID(i)) - } - tenants := make([]*tenantNode, numTenants) for i := 0; i < numTenants; i++ { if !t.SkipInit() { @@ -216,8 +211,7 @@ func runMultiTenantFairness( } tenant := createTenantNode(ctx, t, c, - crdbNode, tenantID(i), tenantNodeID(i), tenantHTTPPort(i), tenantSQLPort(i), - createTenantOtherTenantIDs(tenantIDs)) + crdbNode, tenantID(i), tenantNodeID(i), tenantHTTPPort(i), tenantSQLPort(i)) defer tenant.stop(ctx, t, c) tenants[i] = tenant diff --git a/pkg/cmd/roachtest/tests/multitenant_upgrade.go b/pkg/cmd/roachtest/tests/multitenant_upgrade.go index 24e7390f1d15..ada4d32989a2 100644 --- a/pkg/cmd/roachtest/tests/multitenant_upgrade.go +++ b/pkg/cmd/roachtest/tests/multitenant_upgrade.go @@ -80,7 +80,6 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster, settings := install.MakeClusterSettings(install.BinaryOption(predecessorBinary), install.SecureOption(true)) c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, kvNodes) - tenantStartOpt := createTenantOtherTenantIDs([]int{11, 12, 13, 14}) const tenant11aHTTPPort, tenant11aSQLPort = 8011, 20011 const tenant11bHTTPPort, tenant11bSQLPort = 8016, 20016 @@ -98,13 +97,13 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster, // Create two instances of tenant 11 so that we can test with two pods // running during migration. const tenantNode = 2 - tenant11a := createTenantNode(ctx, t, c, kvNodes, tenant11ID, tenantNode, tenant11aHTTPPort, tenant11aSQLPort, tenantStartOpt) + tenant11a := createTenantNode(ctx, t, c, kvNodes, tenant11ID, tenantNode, tenant11aHTTPPort, tenant11aSQLPort) tenant11a.start(ctx, t, c, predecessorBinary) defer tenant11a.stop(ctx, t, c) // Since the certs are created with the createTenantNode call above, we // call the "no certs" version of create tenant here. - tenant11b := createTenantNodeNoCerts(ctx, t, c, kvNodes, tenant11ID, tenantNode, tenant11bHTTPPort, tenant11bSQLPort, tenantStartOpt) + tenant11b := createTenantNodeNoCerts(ctx, t, c, kvNodes, tenant11ID, tenantNode, tenant11bHTTPPort, tenant11bSQLPort) tenant11b.start(ctx, t, c, predecessorBinary) defer tenant11b.stop(ctx, t, c) @@ -159,7 +158,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster, withResults([][]string{{"1", "bar"}})) t.Status("starting tenant 12 server with older binary") - tenant12 := createTenantNode(ctx, t, c, kvNodes, tenant12ID, tenantNode, tenant12HTTPPort, tenant12SQLPort, tenantStartOpt) + tenant12 := createTenantNode(ctx, t, c, kvNodes, tenant12ID, tenantNode, tenant12HTTPPort, tenant12SQLPort) tenant12.start(ctx, t, c, predecessorBinary) defer tenant12.stop(ctx, t, c) @@ -181,7 +180,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster, runner.Exec(t, `SELECT crdb_internal.create_tenant($1::INT)`, tenant13ID) t.Status("starting tenant 13 server with new binary") - tenant13 := createTenantNode(ctx, t, c, kvNodes, tenant13ID, tenantNode, tenant13HTTPPort, tenant13SQLPort, tenantStartOpt) + tenant13 := createTenantNode(ctx, t, c, kvNodes, tenant13ID, tenantNode, tenant13HTTPPort, tenant13SQLPort) tenant13.start(ctx, t, c, currentBinary) defer tenant13.stop(ctx, t, c) @@ -332,7 +331,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster, runner.Exec(t, `SELECT crdb_internal.create_tenant($1::INT)`, tenant14ID) t.Status("verifying that the tenant 14 server works and has the proper version") - tenant14 := createTenantNode(ctx, t, c, kvNodes, tenant14ID, tenantNode, tenant14HTTPPort, tenant14SQLPort, tenantStartOpt) + tenant14 := createTenantNode(ctx, t, c, kvNodes, tenant14ID, tenantNode, tenant14HTTPPort, tenant14SQLPort) tenant14.start(ctx, t, c, currentBinary) defer tenant14.stop(ctx, t, c) verifySQL(t, tenant14.pgURL, diff --git a/pkg/cmd/roachtest/tests/multitenant_utils.go b/pkg/cmd/roachtest/tests/multitenant_utils.go index 7c5df2dc16a3..d2a6611b62c4 100644 --- a/pkg/cmd/roachtest/tests/multitenant_utils.go +++ b/pkg/cmd/roachtest/tests/multitenant_utils.go @@ -53,22 +53,11 @@ type tenantNode struct { } type createTenantOptions struct { - // TODO(ssd): This is a hack to work around the currently tangled state of - // cluster management between roachtest and roachprod. createTenantNode - // recreates client certs. Only one copy of the client certs are cached - // locally, so if we want a client to work against multiple tenants in a - // single test, we need to create the certs with all tenants. - otherTenantIDs []int - // Set this to expand the scope of the nodes added to the tenant certs. certNodes option.NodeListOption } type createTenantOpt func(*createTenantOptions) -func createTenantOtherTenantIDs(ids []int) createTenantOpt { - return func(c *createTenantOptions) { c.otherTenantIDs = ids } -} - func createTenantCertNodes(nodes option.NodeListOption) createTenantOpt { return func(c *createTenantOptions) { c.certNodes = nodes } }