From 7af8852d86c892c5bc825c904170393d72d0db65 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Tue, 31 Oct 2023 18:12:02 -0400 Subject: [PATCH 1/3] roachtest: mark new npgsql tests as flaky These tests started flaking due to #108414. We will mark them as flaky and investigate later. Release note: None --- pkg/cmd/roachtest/tests/npgsql_blocklist.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/cmd/roachtest/tests/npgsql_blocklist.go b/pkg/cmd/roachtest/tests/npgsql_blocklist.go index 27ed5162c840..919ae9a42c11 100644 --- a/pkg/cmd/roachtest/tests/npgsql_blocklist.go +++ b/pkg/cmd/roachtest/tests/npgsql_blocklist.go @@ -64,6 +64,7 @@ var npgsqlBlocklist = blocklist{ `Npgsql.Tests.CommandTests(Multiplexing).Multiple_statements_with_dependencies`: "unknown", `Npgsql.Tests.CommandTests(Multiplexing).Non_standards_conforming_strings`: "unknown", `Npgsql.Tests.CommandTests(Multiplexing).Positional_parameters_are_not_supported_with_legacy_batching`: "unknown", + `Npgsql.Tests.CommandTests(Multiplexing).Reuse_command_with_different_parameter_placeholder_types`: "flaky", `Npgsql.Tests.CommandTests(NonMultiplexing).Cursor_statement`: "unknown", `Npgsql.Tests.CommandTests(NonMultiplexing).ExecuteNonQuery_Throws_PostgresException(False)`: "unknown", `Npgsql.Tests.CommandTests(NonMultiplexing).ExecuteNonQuery_Throws_PostgresException(True)`: "unknown", @@ -716,9 +717,12 @@ var npgsqlBlocklist = blocklist{ var npgsqlIgnoreList = blocklist{ `Npgsql.Tests.CommandTests(Multiplexing).Cursor_move_RecordsAffected `: "flaky", `Npgsql.Tests.CommandTests(Multiplexing).QueryNonQuery`: "flaky", + `Npgsql.Tests.CommandTests(Multiplexing).Same_command_different_param_instances`: "flaky", + `Npgsql.Tests.CommandTests(Multiplexing).Same_command_different_param_values`: "flaky", `Npgsql.Tests.CommandTests(Multiplexing).SingleNonQuery`: "flaky", `Npgsql.Tests.CommandTests(Multiplexing).SingleQuery`: "flaky", `Npgsql.Tests.CommandTests(Multiplexing).Statement_mapped_output_parameters(Default)`: "flaky", + `Npgsql.Tests.CommandTests(Multiplexing).TableDirect`: "flaky", `Npgsql.Tests.CommandTests(Multiplexing).Use_across_connection_change(NotPrepared)`: "flaky", `Npgsql.Tests.CommandTests(NonMultiplexing).Cached_command_clears_parameters_placeholder_type`: "flaky", `Npgsql.Tests.CommandTests(NonMultiplexing).CloseConnection_with_exception`: "flaky", @@ -771,4 +775,6 @@ var npgsqlIgnoreList = blocklist{ `Npgsql.Tests.TaskTimeoutAndCancellationTest.DelayedFaultedTaskCancellation("TimeoutOnly")`: "flaky", `Npgsql.Tests.TransactionTests(Multiplexing).Failed_transaction_on_close_with_custom_timeout`: "flaky", `Npgsql.Tests.TransactionTests(NonMultiplexing).CommitAsync(Prepared)`: "flaky", + `Npgsql.Tests.TransactionTests(NonMultiplexing).Rollback(Prepared)`: "flaky", + `Npgsql.Tests.TransactionTests(NonMultiplexing).RollbackAsync(NotPrepared)`: "flaky", } From a2e92c2db4972d69a9d48d776264a44356b61dfb Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 31 Oct 2023 16:58:19 -0700 Subject: [PATCH 2/3] server/profiler: mark ActiveQueryDumpsEnabled as SystemVisible This setting was miscategorized initially - the active query profiler runs in both storage and virtual clusters, so it should be at least SystemVisible, but maybe even ApplicationLevel. Release note: None --- docs/generated/settings/settings-for-tenants.txt | 1 + docs/generated/settings/settings.html | 2 +- pkg/server/profiler/cluster_settings.go | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index be6457ed0611..dca6bfc81b11 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -30,6 +30,7 @@ cloudstorage.timeout duration 10m0s the timeout for import/export storage operat cluster.auto_upgrade.enabled boolean true disable automatic cluster version upgrade until reset application cluster.organization string organization name system-visible cluster.preserve_downgrade_option string disable (automatic or manual) cluster version upgrade from the specified version until reset application +diagnostics.active_query_dumps.enabled boolean true experimental: enable dumping of anonymized active queries to disk when node is under memory pressure system-visible diagnostics.forced_sql_stat_reset.interval duration 2h0m0s interval after which the reported SQL Stats are reset even if not collected by telemetry reporter. It has a max value of 24H. application diagnostics.reporting.enabled boolean true enable reporting diagnostic metrics to cockroach labs application diagnostics.reporting.interval duration 1h0m0s interval at which diagnostics data should be reported application diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 70598afff965..d381b6fda1a7 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -35,7 +35,7 @@
cluster.auto_upgrade.enabled
booleantruedisable automatic cluster version upgrade until resetServerless/Dedicated/Self-Hosted
cluster.organization
stringorganization nameServerless/Dedicated/Self-Hosted (read-only)
cluster.preserve_downgrade_option
stringdisable (automatic or manual) cluster version upgrade from the specified version until resetServerless/Dedicated/Self-Hosted -
diagnostics.active_query_dumps.enabled
booleantrueexperimental: enable dumping of anonymized active queries to disk when node is under memory pressureDedicated/Self-Hosted +
diagnostics.active_query_dumps.enabled
booleantrueexperimental: enable dumping of anonymized active queries to disk when node is under memory pressureServerless/Dedicated/Self-Hosted (read-only)
diagnostics.forced_sql_stat_reset.interval
duration2h0m0sinterval after which the reported SQL Stats are reset even if not collected by telemetry reporter. It has a max value of 24H.Serverless/Dedicated/Self-Hosted
diagnostics.reporting.enabled
booleantrueenable reporting diagnostic metrics to cockroach labsServerless/Dedicated/Self-Hosted
diagnostics.reporting.interval
duration1h0m0sinterval at which diagnostics data should be reportedServerless/Dedicated/Self-Hosted diff --git a/pkg/server/profiler/cluster_settings.go b/pkg/server/profiler/cluster_settings.go index 83bcc08bff32..7da523e2754d 100644 --- a/pkg/server/profiler/cluster_settings.go +++ b/pkg/server/profiler/cluster_settings.go @@ -21,7 +21,7 @@ import "github.com/cockroachdb/cockroach/pkg/settings" // Note: this feature only works for nodes running on unix hosts with cgroups // enabled. var ActiveQueryDumpsEnabled = settings.RegisterBoolSetting( - settings.SystemOnly, + settings.SystemVisible, "diagnostics.active_query_dumps.enabled", "experimental: enable dumping of anonymized active queries to disk when node is under memory pressure", true, From e720de12451ad0b0e41e6faa527392461d121ca0 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 31 Oct 2023 19:04:21 -0700 Subject: [PATCH 3/3] cli: create new tracing span in mt-start-sql This commit fixes a possible misuse of the tracing infrastructure with `mt start-sql` command. In particular, previously it was possible for a tracing span created (and finished) in `runStartInternal` to be used by a couple of things in `makeTenantSQLServerArgs` (main one being `rpc.Context`) that hold on to the context after the corresponding span has been finished. This is disallowed (since we reuse spans after `Finish` has been called on them) and was recently discovered. This problem is now fixed by deriving a child span only in `mt start-sql` code path that is never finished (since those users can be active throughout the whole lifetime of the tenant server). The bug is pretty minor (I think only serverless is affected), so there is no release note. Release note: None --- pkg/cli/mt_start_sql.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkg/cli/mt_start_sql.go b/pkg/cli/mt_start_sql.go index f6c59339f42c..9e05258e976f 100644 --- a/pkg/cli/mt_start_sql.go +++ b/pkg/cli/mt_start_sql.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/server/serverctl" "github.com/cockroachdb/cockroach/pkg/util/stop" + "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/redact" "github.com/spf13/cobra" ) @@ -63,6 +64,17 @@ func runStartSQL(cmd *cobra.Command, args []string) error { } newServerFn := func(ctx context.Context, serverCfg server.Config, stopper *stop.Stopper) (serverctl.ServerStartupInterface, error) { + // The context passed into NewSeparateProcessTenantServer will be + // captured in makeTenantSQLServerArgs by a few users that will hold on + // to it. Those users can log something into the current tracing span + // (created in runStartInternal) after the span has been Finish()'ed + // which is disallowed. To go around this issue, in this code path we + // create a new tracing span that is never finished since those users + // stay alive until the tenant server is up. + // TODO(yuzefovich): consider changing NewSeparateProcessTenantServer to + // not take in a context and - akin to NewServer - to construct its own + // from context.Background. + ctx, _ = tracing.ChildSpan(ctx, "mt-start-sql" /* opName */) // Beware of not writing simply 'return server.NewServer()'. This is // because it would cause the serverStartupInterface reference to // always be non-nil, even if NewServer returns a nil pointer (and