Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…db#99369

99236: roachprod: retry GetInternalIP on error r=srosenberg a=smg260

Epic: none
Fixes: cockroachdb#98285, cockroachdb#98342

Release note: None

99240: sql: add computed columns to crdb_internal persisted sql stats views r=ericharmeling a=ericharmeling

Follows cockroachdb#99042, cockroachdb#99148.

This commit adds the following computed columns to both `crdb_internal.statement_statistics_persisted` and `crdb_internal.transaction_statistics_persisted` views:
-`execution_count`
-`service_latency`
-`cpu_sql_nanos`
-`contention_time`
-`total_estimated_execution_time`
-`p99_latency`

These computed columns are indexed in the system tables from which these views are created.

Epic: None

Release note: The `crdb_internal.statement_statistics_persisted` and `crdb_internal.transaction_statistics_persisted` views now include computed columns, to optimize the performance of frequently-queried expressions.

99318: multitenant: don't panic if reader doesn't exist yet r=knz,ajstorm,arulajmani a=stevendanna

While shared-process tenant servers are not likely to make requests
before the capability reader exists, the limiter factory looks up the
relevant tenant ID from the start key of the range descriptor and it
isn't unlikely that we'll see requests against tenant ranges before we
have a capability reader available.

Epic: none

Release note: None

Release justification: Low risk fix to avoid panics in tests.

99369: build: use same roachtest parallelism and cpuquota for AWS and GCE r=rickystewart a=renatolabs

The parallelism and cpuquota passed to AWS is much lower than that for GCE when invoking roachtest nightly builds. The AWS values have been imported from TeamCity ~3 years ago [1] and haven't changed since then. However, we are starting to see Roachtest Nightly builds time out on AWS [2] now that teams are starting to write more roachtests that run on AWS.

This commit removes the custom `PARALLELISM` and `CPUQUOTA` settings we had in place for AWS, making it consistent with GCE.

[1] see 8219a7f
[2] https://teamcity.cockroachdb.com/viewLog.html?buildId=9182737&buildTypeId=Cockroach_Nightlies_RoachtestNightlyAwsBazel

Epic: none

Release note: None

Co-authored-by: Miral Gadani <[email protected]>
Co-authored-by: Eric Harmeling <[email protected]>
Co-authored-by: Steven Danna <[email protected]>
Co-authored-by: Renato Costa <[email protected]>
  • Loading branch information
5 people committed Mar 23, 2023
5 parents 44098c4 + 9110629 + 1a03549 + e436016 + c7d6cf2 commit da20f04
Show file tree
Hide file tree
Showing 8 changed files with 516 additions and 123 deletions.
2 changes: 0 additions & 2 deletions build/teamcity/util/roachtest_util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ case "${CLOUD}" in
gce)
;;
aws)
PARALLELISM=3
CPUQUOTA=384
if [ -z "${TESTS}" ]; then
# NB: anchor ycsb to beginning of line to avoid matching `zfs/ycsb/*` which
# isn't supported on AWS at time of writing.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ go_library(
"//pkg/settings",
"//pkg/settings/cluster",
"//pkg/util/log",
"//pkg/util/log/logcrash",
"@com_github_cockroachdb_errors//:errors",
],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -240,11 +239,18 @@ func (a *Authorizer) elideCapabilityChecks(ctx context.Context, tenID roachpb.Te
// IsExemptFromRateLimiting returns true if the tenant is not subject to rate limiting.
func (a *Authorizer) IsExemptFromRateLimiting(ctx context.Context, tenID roachpb.TenantID) bool {
if a.elideCapabilityChecks(ctx, tenID) {
// By default, the system tenant is exempt from rate
// limiting and secondary tenants are not.
return tenID.IsSystem()
}

// Because tenant limiters are constructed based on the range
// bounds of the replica, requests from the system tenant that
// touch a tenant span will result in a query to this
// capability before we have a capability reader. We do not
// want to panic in that case since it is currently expected.
if a.capabilitiesReader == nil {
logcrash.ReportOrPanic(ctx, &a.settings.SV, "trying to perform capability check when no reader exists")
log.Warningf(ctx, "capability check for tenant %s before capability reader exists, assuming rate limit applies", tenID.String())
return false
}
if cp, found := a.capabilitiesReader.GetCapabilities(tenID); found {
Expand Down
19 changes: 14 additions & 5 deletions pkg/roachprod/install/cluster_synced.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,8 @@ tar cf - .ssh/id_rsa .ssh/id_rsa.pub .ssh/authorized_keys
ip, err = c.GetInternalIP(l, ctx, node)
if err != nil {
res.Err = errors.Wrapf(err, "pgurls")
return res, res.Err
// By returning a nil error here, we'll retry the command.
return res, nil
}
time.Sleep(time.Second)
}
Expand Down Expand Up @@ -1541,7 +1542,7 @@ func (c *SyncedCluster) createNodeCertArguments(

res.Stdout, res.Err = c.GetInternalIP(l, ctx, node)
ips[i] = res.Stdout
return res, errors.Wrapf(res.Err, "IPs")
return res, nil
}, DefaultSSHRetryOpts); err != nil {
return nil, err
}
Expand Down Expand Up @@ -2291,9 +2292,9 @@ func (c *SyncedCluster) pghosts(
res := &RunResultDetails{Node: node}
res.Stdout, res.Err = c.GetInternalIP(l, ctx, node)
ips[i] = res.Stdout
return res, errors.Wrapf(res.Err, "pghosts")
return res, nil
}, DefaultSSHRetryOpts); err != nil {
return nil, err
return nil, errors.Wrapf(err, "pghosts")
}

m := make(map[Node]string, len(ips))
Expand Down Expand Up @@ -2411,6 +2412,9 @@ type ParallelResult struct {
// cluster. If any of the commands fail, Parallel will log an error
// and exit the program.
//
// A user may also pass in a RunRetryOpts to control how the function is retried
// in the case of a failure.
//
// See ParallelE for more information.
func (c *SyncedCluster) Parallel(
l *logger.Logger,
Expand Down Expand Up @@ -2439,7 +2443,12 @@ func (c *SyncedCluster) Parallel(
// 0, then it defaults to `count`.
//
// The function returns a pointer to RunResultDetails as we may enrich
// the result with retry information (attempt number, wrapper error)
// the result with retry information (attempt number, wrapper error).
//
// RunRetryOpts controls the retry behavior in the case that
// the function fails, but returns a nil error. A non-nil error returned by the
// function denotes a roachprod error and will not be retried regardless of the
// retry options.
//
// If err is non-nil, the slice of ParallelResults will contain the
// results from any of the failed invocations.
Expand Down
6 changes: 2 additions & 4 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -469,13 +469,12 @@ func IP(
ips[i] = c.VMs[nodes[i]-1].PublicIP
}
} else {
var err error
if err := c.Parallel(l, "", len(nodes), 0, func(i int) (*install.RunResultDetails, error) {
node := nodes[i]
res := &install.RunResultDetails{Node: node}
res.Stdout, res.Err = c.GetInternalIP(l, ctx, node)
ips[i] = res.Stdout
return res, err
return res, nil
}, install.DefaultSSHRetryOpts); err != nil {
return nil, err
}
Expand Down Expand Up @@ -895,13 +894,12 @@ func PgURL(
ips[i] = c.VMs[nodes[i]-1].PublicIP
}
} else {
var err error
if err := c.Parallel(l, "", len(nodes), 0, func(i int) (*install.RunResultDetails, error) {
node := nodes[i]
res := &install.RunResultDetails{Node: node}
res.Stdout, res.Err = c.GetInternalIP(l, ctx, node)
ips[i] = res.Stdout
return res, err
return res, nil
}, install.DefaultSSHRetryOpts); err != nil {
return nil, err
}
Expand Down
28 changes: 26 additions & 2 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -6393,7 +6393,13 @@ CREATE VIEW crdb_internal.statement_statistics_persisted AS
statistics,
plan,
index_recommendations,
indexes_usage
indexes_usage,
execution_count,
service_latency,
cpu_sql_nanos,
contention_time,
total_estimated_execution_time,
p99_latency
FROM
system.statement_statistics`,
resultColumns: colinfo.ResultColumns{
Expand All @@ -6409,6 +6415,12 @@ CREATE VIEW crdb_internal.statement_statistics_persisted AS
{Name: "plan", Typ: types.Jsonb},
{Name: "index_recommendations", Typ: types.StringArray},
{Name: "indexes_usage", Typ: types.Jsonb},
{Name: "execution_count", Typ: types.Int},
{Name: "service_latency", Typ: types.Float},
{Name: "cpu_sql_nanos", Typ: types.Float},
{Name: "contention_time", Typ: types.Float},
{Name: "total_estimated_execution_time", Typ: types.Float},
{Name: "p99_latency", Typ: types.Float},
},
}

Expand Down Expand Up @@ -6623,7 +6635,13 @@ CREATE VIEW crdb_internal.transaction_statistics_persisted AS
node_id,
agg_interval,
metadata,
statistics
statistics,
execution_count,
service_latency,
cpu_sql_nanos,
contention_time,
total_estimated_execution_time,
p99_latency
FROM
system.transaction_statistics`,
resultColumns: colinfo.ResultColumns{
Expand All @@ -6634,6 +6652,12 @@ CREATE VIEW crdb_internal.transaction_statistics_persisted AS
{Name: "agg_interval", Typ: types.Interval},
{Name: "metadata", Typ: types.Jsonb},
{Name: "statistics", Typ: types.Jsonb},
{Name: "execution_count", Typ: types.Int},
{Name: "service_latency", Typ: types.Float},
{Name: "cpu_sql_nanos", Typ: types.Float},
{Name: "contention_time", Typ: types.Float},
{Name: "total_estimated_execution_time", Typ: types.Float},
{Name: "p99_latency", Typ: types.Float},
},
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog

Large diffs are not rendered by default.

Loading

0 comments on commit da20f04

Please sign in to comment.