From ca0d2ed14e304fb445200fce1a4bdf7d11185f40 Mon Sep 17 00:00:00 2001 From: DarrylWong Date: Mon, 8 Jan 2024 14:37:10 -0500 Subject: [PATCH 1/4] roachtest: assign adminui ports dynamically for virtual clusters This was originally removed in #115599 due to #114097 merging, but adminui was reverted in #117141 and mistakenly did not revert the special case for virtual clusters. Release note: None --- pkg/cmd/roachtest/option/options.go | 2 ++ pkg/cmd/roachtest/tests/multitenant_distsql.go | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cmd/roachtest/option/options.go b/pkg/cmd/roachtest/option/options.go index 48e0cee989a5..2e7bb5022fb0 100644 --- a/pkg/cmd/roachtest/option/options.go +++ b/pkg/cmd/roachtest/option/options.go @@ -63,6 +63,8 @@ func DefaultStartVirtualClusterOpts(tenantName string, sqlInstance int) StartOpt startOpts.RoachprodOpts.Target = install.StartServiceForVirtualCluster startOpts.RoachprodOpts.VirtualClusterName = tenantName startOpts.RoachprodOpts.SQLInstance = sqlInstance + // TODO(DarrylWong): remove once #117125 is addressed. + startOpts.RoachprodOpts.AdminUIPort = 0 return startOpts } diff --git a/pkg/cmd/roachtest/tests/multitenant_distsql.go b/pkg/cmd/roachtest/tests/multitenant_distsql.go index 35114817ab4c..0cf83d22cf77 100644 --- a/pkg/cmd/roachtest/tests/multitenant_distsql.go +++ b/pkg/cmd/roachtest/tests/multitenant_distsql.go @@ -42,8 +42,6 @@ func registerMultiTenantDistSQL(r registry.Registry) { CompatibleClouds: registry.AllExceptAWS, Suites: registry.Suites(registry.Nightly), Leases: registry.MetamorphicLeases, - // TODO(#117150): unskip when #117150 is fixed. - Skip: "#117150", Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runMultiTenantDistSQL(ctx, t, c, numInstances, b == "on", to) }, From d91c9251ea0fb2286b379909de91239321af6aa8 Mon Sep 17 00:00:00 2001 From: Pavel Kalinnikov Date: Tue, 9 Jan 2024 15:49:06 +0000 Subject: [PATCH 2/4] rpc: rm rangefeed RPC stream window special case The rangefeed stream window size tuning was introduced to mitigate OOM in rangefeeds caused by the excessive number of streams (one per Range). Since we now use mux rangefeeds (which multiplexes all the rangefeed traffic into a single stream), this setting is no longer needed, so this commit removes it. Release note (ops change): COCKROACH_RANGEFEED_RPC_INITIAL_WINDOW_SIZE env variable has been removed, and rangefeed connection now uses the same window size as other RPC connections. --- pkg/rpc/context.go | 10 ++++------ pkg/rpc/settings.go | 10 ---------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/pkg/rpc/context.go b/pkg/rpc/context.go index 2646c8e74b7b..d6cfd9afe83c 100644 --- a/pkg/rpc/context.go +++ b/pkg/rpc/context.go @@ -1785,12 +1785,10 @@ func (rpcCtx *Context) dialOptsCommon( dialOpts = append(dialOpts, grpc.WithDisableRetry()) // Configure the window sizes with optional env var overrides. - dialOpts = append(dialOpts, grpc.WithInitialConnWindowSize(rpcCtx.initialConnWindowSize(ctx))) - if class == RangefeedClass { - dialOpts = append(dialOpts, grpc.WithInitialWindowSize(rpcCtx.rangefeedInitialWindowSize(ctx))) - } else { - dialOpts = append(dialOpts, grpc.WithInitialWindowSize(rpcCtx.initialWindowSize(ctx))) - } + dialOpts = append(dialOpts, + grpc.WithInitialConnWindowSize(rpcCtx.initialConnWindowSize(ctx)), + grpc.WithInitialWindowSize(rpcCtx.initialWindowSize(ctx)), + ) unaryInterceptors := rpcCtx.clientUnaryInterceptors unaryInterceptors = unaryInterceptors[:len(unaryInterceptors):len(unaryInterceptors)] if rpcCtx.Knobs.UnaryClientInterceptor != nil { diff --git a/pkg/rpc/settings.go b/pkg/rpc/settings.go index f3aa6eaaa22f..a3a19678788a 100644 --- a/pkg/rpc/settings.go +++ b/pkg/rpc/settings.go @@ -81,8 +81,6 @@ type windowSizeSettings struct { initialWindowSize int32 // initialConnWindowSize is the initial window size for a connection. initialConnWindowSize int32 - // rangefeedInitialWindowSize is the initial window size for a RangeFeed RPC. - rangefeedInitialWindowSize int32 } } @@ -94,8 +92,6 @@ func (s *windowSizeSettings) maybeInit(ctx context.Context) { if s.values.initialConnWindowSize > maximumWindowSize { s.values.initialConnWindowSize = maximumWindowSize } - s.values.rangefeedInitialWindowSize = getWindowSize(ctx, - "COCKROACH_RANGEFEED_RPC_INITIAL_WINDOW_SIZE", RangefeedClass, 2*defaultWindowSize /* 128KB */) }) } @@ -111,12 +107,6 @@ func (s *windowSizeSettings) initialConnWindowSize(ctx context.Context) int32 { return s.values.initialConnWindowSize } -// For a RangeFeed RPC. -func (s *windowSizeSettings) rangefeedInitialWindowSize(ctx context.Context) int32 { - s.maybeInit(ctx) - return s.values.rangefeedInitialWindowSize -} - // sourceAddr is the environment-provided local address for outgoing // connections. var sourceAddr = func() net.Addr { From 480882fdb6e7cb544e0e36ab97ab4c89f9e43c0a Mon Sep 17 00:00:00 2001 From: Jeff Date: Tue, 9 Jan 2024 16:36:04 +0000 Subject: [PATCH 3/4] sqlproxyccl: improve authentication throttle error The sql proxy will throttle connection attempts if a (client IP, tenant cluster) pair has too many authentication failures. The error is usually caused by a misconfigured password in a connection pool. This change replaces the "connection attempt throttled" error message with "too many failed authentication attempts". There is a hint that includes this message but not all drivers are configured to log hints. Fixes #117552 --- pkg/ccl/sqlproxyccl/authentication_test.go | 6 +++--- pkg/ccl/sqlproxyccl/proxy_handler.go | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/ccl/sqlproxyccl/authentication_test.go b/pkg/ccl/sqlproxyccl/authentication_test.go index 7428961762f6..b28a0b090565 100644 --- a/pkg/ccl/sqlproxyccl/authentication_test.go +++ b/pkg/ccl/sqlproxyccl/authentication_test.go @@ -121,7 +121,7 @@ func TestAuthenticateThrottled(t *testing.T) { require.Equal(t, msg, &pgproto3.ErrorResponse{ Severity: "FATAL", Code: "08C00", - Message: "codeProxyRefusedConnection: connection attempt throttled", + Message: "codeProxyRefusedConnection: too many failed authentication attempts", Hint: throttledErrorHint, }) @@ -142,10 +142,10 @@ func TestAuthenticateThrottled(t *testing.T) { _, err := authenticate(proxyToClient, proxyToServer, nil, /* proxyBackendKeyData */ func(status throttler.AttemptStatus) error { require.Equal(t, throttler.AttemptInvalidCredentials, status) - return throttledError + return authThrottledError }) require.Error(t, err) - require.Contains(t, err.Error(), "connection attempt throttled") + require.Contains(t, err.Error(), "too many failed authentication attempts") proxyToServer.Close() proxyToClient.Close() diff --git a/pkg/ccl/sqlproxyccl/proxy_handler.go b/pkg/ccl/sqlproxyccl/proxy_handler.go index 423e9404f9dc..e5c39c8a33c3 100644 --- a/pkg/ccl/sqlproxyccl/proxy_handler.go +++ b/pkg/ccl/sqlproxyccl/proxy_handler.go @@ -175,9 +175,9 @@ const throttledErrorHint string = `Connection throttling is triggered by repeate sure the username and password are correct. ` -var throttledError = errors.WithHint( +var authThrottledError = errors.WithHint( withCode(errors.New( - "connection attempt throttled"), codeProxyRefusedConnection), + "too many failed authentication attempts"), codeProxyRefusedConnection), throttledErrorHint) // newProxyHandler will create a new proxy handler with configuration based on @@ -432,7 +432,7 @@ func (handler *proxyHandler) handle(ctx context.Context, incomingConn net.Conn) throttleTime, err := handler.throttleService.LoginCheck(throttleTags) if err != nil { log.Errorf(ctx, "throttler refused connection: %v", err.Error()) - err = throttledError + err = authThrottledError updateMetricsAndSendErrToClient(err, fe.Conn, handler.metrics) return err } @@ -467,7 +467,7 @@ func (handler *proxyHandler) handle(ctx context.Context, incomingConn net.Conn) ctx, throttleTags, throttleTime, status, ); err != nil { log.Errorf(ctx, "throttler refused connection after authentication: %v", err.Error()) - return throttledError + return authThrottledError } return nil }, From f6519af5a0a2f0a68552070774398dc6d7b6f5b1 Mon Sep 17 00:00:00 2001 From: DarrylWong Date: Mon, 8 Jan 2024 14:39:47 -0500 Subject: [PATCH 4/4] roachtest: discover the adminUIPort for acceptance/gossip/restart-node-one This test relies on assumption `adminUIPort = SQLPort + 1` but adminui is temporarily hardcoded as 26258 while SQLPort will be dynamically assigned without the following override. This changes it to discover the port instead of relying on this assumption. Release note: None --- pkg/cmd/roachtest/cluster.go | 6 +++++ .../roachtest/cluster/cluster_interface.go | 3 ++- pkg/cmd/roachtest/tests/gossip.go | 9 ++++++-- pkg/roachprod/roachprod.go | 22 +++++++++++++++++++ 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index 7e39f9e08100..6bbe1fe2c3ff 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -2490,6 +2490,12 @@ func (c *clusterImpl) ExternalAdminUIAddr( return c.adminUIAddr(ctx, l, node, true) } +func (c *clusterImpl) AdminUIPorts( + ctx context.Context, l *logger.Logger, nodes option.NodeListOption, +) ([]int, error) { + return roachprod.AdminPorts(ctx, l, c.MakeNodes(nodes), c.IsSecure()) +} + func (c *clusterImpl) adminUIAddr( ctx context.Context, l *logger.Logger, node option.NodeListOption, external bool, ) ([]string, error) { diff --git a/pkg/cmd/roachtest/cluster/cluster_interface.go b/pkg/cmd/roachtest/cluster/cluster_interface.go index 2e7bce895d78..e0aba2bfd2cd 100644 --- a/pkg/cmd/roachtest/cluster/cluster_interface.go +++ b/pkg/cmd/roachtest/cluster/cluster_interface.go @@ -86,10 +86,11 @@ type Cluster interface { Conn(ctx context.Context, l *logger.Logger, node int, opts ...func(*option.ConnOption)) *gosql.DB ConnE(ctx context.Context, l *logger.Logger, node int, opts ...func(*option.ConnOption)) (*gosql.DB, error) - // URLs for the Admin UI. + // URLs and Ports for the Admin UI. InternalAdminUIAddr(ctx context.Context, l *logger.Logger, node option.NodeListOption) ([]string, error) ExternalAdminUIAddr(ctx context.Context, l *logger.Logger, node option.NodeListOption) ([]string, error) + AdminUIPorts(ctx context.Context, l *logger.Logger, node option.NodeListOption) ([]int, error) // Running commands on nodes. diff --git a/pkg/cmd/roachtest/tests/gossip.go b/pkg/cmd/roachtest/tests/gossip.go index ea3afd04665a..49ddac1f934a 100644 --- a/pkg/cmd/roachtest/tests/gossip.go +++ b/pkg/cmd/roachtest/tests/gossip.go @@ -425,13 +425,18 @@ SELECT count(replicas) t.L().Printf("killing all nodes\n") c.Stop(ctx, t.L(), option.DefaultStopOpts()) + adminPorts, err := c.AdminUIPorts(ctx, t.L(), c.Node(1)) + if err != nil { + t.Fatal(err) + } + // Restart node 1, but have it listen on a different port for internal // connections. This will require node 1 to reach out to the other nodes in // the cluster for gossip info. - err := c.RunE(ctx, c.Node(1), + err = c.RunE(ctx, c.Node(1), ` ./cockroach start --insecure --background --store={store-dir} `+ `--log-dir={log-dir} --cache=10% --max-sql-memory=10% `+ - `--listen-addr=:$[{pgport:1}+1000] --http-port=$[{pgport:1}+1] `+ + fmt.Sprintf(`--listen-addr=:$[{pgport:1}+1000] --http-port=%d `, adminPorts[0])+ `--join={pghost:1}:{pgport:1} `+ `--advertise-addr={pghost:1}:$[{pgport:1}+1000] `+ `> {log-dir}/cockroach.stdout 2> {log-dir}/cockroach.stderr`) diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index 40914335e5f3..ea2f9be4f9b9 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -1074,6 +1074,28 @@ func AdminURL( return urlGenerator(ctx, c, l, c.TargetNodes(), uConfig) } +// AdminPorts finds the AdminUI ports for a cluster. +func AdminPorts( + ctx context.Context, l *logger.Logger, clusterName string, secure bool, +) ([]int, error) { + if err := LoadClusters(); err != nil { + return nil, err + } + c, err := newCluster(l, clusterName, install.SecureOption(secure)) + if err != nil { + return nil, err + } + var ports []int + for _, node := range c.Nodes { + port, err := c.NodeUIPort(ctx, node) + if err != nil { + return nil, errors.Wrapf(err, "Error discovering UI Port for node %d", node) + } + ports = append(ports, port) + } + return ports, nil +} + // PprofOpts specifies the options needed by Pprof(). type PprofOpts struct { Heap bool