From ef464465aed263857ade16f337915ad56db231d2 Mon Sep 17 00:00:00 2001 From: Michael Erickson Date: Fri, 26 Aug 2022 09:08:14 -0700 Subject: [PATCH] sql/stats: add table setting to disable generation of stats forecasts Add a new storage parameter (a.k.a. table setting) which can be used to override cluster setting `sql.stats.forecasts.enabled`. This builds on work done earlier in #79025 and #86932 so we don't have to do much, just wire it all together. Fixes: #86353 Release justification: Low-risk update to new functionality. Release note (sql change): Add a new table setting `sql_stats_forecasts_enabled` which controls whether statistics forecasts are generated for a specific table. When set, this overrides cluster setting `sql.stats.forecasts.enabled`. --- pkg/sql/catalog/descriptor.go | 4 + pkg/sql/catalog/tabledesc/structured.go | 12 ++ .../opt/exec/execbuilder/testdata/forecast | 201 +++++++++++++++++- pkg/sql/stats/stats_cache.go | 3 + .../tablestorageparam/table_storage_param.go | 16 ++ 5 files changed, 234 insertions(+), 2 deletions(-) diff --git a/pkg/sql/catalog/descriptor.go b/pkg/sql/catalog/descriptor.go index b12c620cda88..b4152e643ac9 100644 --- a/pkg/sql/catalog/descriptor.go +++ b/pkg/sql/catalog/descriptor.go @@ -712,6 +712,10 @@ type TableDescriptor interface { // GetAutoStatsSettings returns the table settings related to automatic // statistics collection. May return nil if none are set. GetAutoStatsSettings() *catpb.AutoStatsSettings + // ForecastStatsEnabled indicates whether statistics forecasting is explicitly + // enabled or disabled for this table. If ok is true, then the enabled value + // is valid, otherwise this has not been set at the table level. + ForecastStatsEnabled() (enabled bool, ok bool) // GetIndexNameByID returns the name of an index based on an ID, taking into // account any ongoing declarative schema changes. Declarative schema changes // do not propagate the index name into the mutations until changes are fully diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index c994931db116..5d139b736b30 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "sort" + "strconv" "strings" "github.com/cockroachdb/cockroach/pkg/clusterversion" @@ -2661,6 +2662,9 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string { fmt.Sprintf("%g", value)) } } + if enabled, ok := desc.ForecastStatsEnabled(); ok { + appendStorageParam(`sql_stats_forecasts_enabled`, strconv.FormatBool(enabled)) + } return storageParams } @@ -2716,6 +2720,14 @@ func (desc *wrapper) GetAutoStatsSettings() *catpb.AutoStatsSettings { return desc.AutoStatsSettings } +// ForecastStatsEnabled implements the TableDescriptor interface. +func (desc *wrapper) ForecastStatsEnabled() (enabled bool, ok bool) { + if desc.ForecastStats == nil { + return false, false + } + return *desc.ForecastStats, true +} + // SetTableLocalityRegionalByTable sets the descriptor's locality config to // regional at the table level in the supplied region. An empty region name // (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in diff --git a/pkg/sql/opt/exec/execbuilder/testdata/forecast b/pkg/sql/opt/exec/execbuilder/testdata/forecast index 17654977dd22..638c81d317c1 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/forecast +++ b/pkg/sql/opt/exec/execbuilder/testdata/forecast @@ -423,7 +423,8 @@ scan s # changes at a constant rate. statement ok -CREATE TABLE c (h TIMESTAMPTZ PRIMARY KEY) WITH (sql_stats_automatic_collection_enabled = false) +CREATE TABLE c (h TIMESTAMPTZ PRIMARY KEY) +WITH (sql_stats_automatic_collection_enabled = false, sql_stats_forecasts_enabled = true) statement ok ALTER TABLE c INJECT STATISTICS '[ @@ -735,6 +736,154 @@ WHERE stat->>'name' = '__forecast__'; } ] +# Test that we can disable forecasts for individual tables. + +statement ok +CREATE TABLE d (d DATE PRIMARY KEY) +WITH (sql_stats_automatic_collection_enabled = false, sql_stats_forecasts_enabled = false) + +statement ok +ALTER TABLE d INJECT STATISTICS '[ + { + "avg_size": 3, + "columns": [ + "d" + ], + "created_at": "1999-12-21 00:00:00.000000", + "distinct_count": 1, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "1999-12-16" + }, + { + "distinct_range": 1, + "num_eq": 0, + "num_range": 1, + "upper_bound": "1999-12-21" + } + ], + "histo_col_type": "DATE", + "histo_version": 2, + "name": "__auto__", + "null_count": 0, + "row_count": 1 + }, + { + "avg_size": 3, + "columns": [ + "d" + ], + "created_at": "1999-12-26 00:00:00.000000", + "distinct_count": 2, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "1999-12-16" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "1999-12-26" + } + ], + "histo_col_type": "DATE", + "histo_version": 2, + "name": "__auto__", + "null_count": 0, + "row_count": 2 + }, + { + "avg_size": 3, + "columns": [ + "d" + ], + "created_at": "1999-12-31 00:00:00.000000", + "distinct_count": 3, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "1999-12-16" + }, + { + "distinct_range": 3, + "num_eq": 0, + "num_range": 3, + "upper_bound": "1999-12-31" + } + ], + "histo_col_type": "DATE", + "histo_version": 2, + "name": "__auto__", + "null_count": 0, + "row_count": 3 + } +]' + +query TTTIIII +SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size +FROM [SHOW STATISTICS FOR TABLE d WITH FORECAST] +ORDER BY created +---- +__auto__ {d} 1999-12-21 00:00:00 +0000 +0000 1 1 0 3 +__auto__ {d} 1999-12-26 00:00:00 +0000 +0000 2 2 0 3 +__auto__ {d} 1999-12-31 00:00:00 +0000 +0000 3 3 0 3 +__forecast__ {d} 2000-01-05 00:00:00 +0000 +0000 4 4 0 3 + +query T +SELECT jsonb_pretty(stat->'histo_buckets') +FROM ( + SELECT jsonb_array_elements(statistics) AS stat + FROM [SHOW STATISTICS USING JSON FOR TABLE d WITH FORECAST] +) +WHERE stat->>'name' = '__forecast__'; +---- +[ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "1999-12-16" + }, + { + "distinct_range": 4, + "num_eq": 0, + "num_range": 4, + "upper_bound": "2000-01-05" + } +] + +query T +EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (100% of the table; stats collected ago) + table: d@d_pkey + spans: [/'1999-12-16' - ] + +query T +EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16' +---- +scan d + ├── columns: d:1 + ├── constraint: /1: [/'1999-12-16' - ] + ├── stats: [rows=3, distinct(1)=3, null(1)=0] + │ histogram(1)= 0 0 3 0 + │ <--- '1999-12-16' --- '1999-12-31' + ├── cost: 17.11 + ├── key: (1) + └── distribution: test + # Test that optimizer_use_forecasts can be used to enable and disable forecasts. statement ok @@ -789,6 +938,19 @@ scan x ├── key: (1) └── distribution: test +query T +EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16' +---- +scan d + ├── columns: d:1 + ├── constraint: /1: [/'1999-12-16' - ] + ├── stats: [rows=3, distinct(1)=3, null(1)=0] + │ histogram(1)= 0 0 3 0 + │ <--- '1999-12-16' --- '1999-12-31' + ├── cost: 17.11 + ├── key: (1) + └── distribution: test + statement ok RESET optimizer_use_forecasts @@ -843,6 +1005,19 @@ scan x ├── key: (1) └── distribution: test +query T +EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16' +---- +scan d + ├── columns: d:1 + ├── constraint: /1: [/'1999-12-16' - ] + ├── stats: [rows=3, distinct(1)=3, null(1)=0] + │ histogram(1)= 0 0 3 0 + │ <--- '1999-12-16' --- '1999-12-31' + ├── cost: 17.11 + ├── key: (1) + └── distribution: test + # Test that sql.stats.forecasts.enabled can be used to enable and disable # generation of forecasts in the stats cache. @@ -878,7 +1053,7 @@ distribution: local vectorized: true · • scan - estimated row count: 0 (<0.01% of the table; stats collected ago) + estimated row count: 23 (96% of the table; stats collected ago; using stats forecast) table: c@c_pkey spans: [/'1988-08-07 00:00:00.000001+00:00' - ] @@ -893,6 +1068,17 @@ vectorized: true table: x@x_pkey spans: [/17 - ] +query T +EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (100% of the table; stats collected ago) + table: d@d_pkey + spans: [/'1999-12-16' - ] + statement ok RESET CLUSTER SETTING sql.stats.forecasts.enabled @@ -939,3 +1125,14 @@ vectorized: true estimated row count: 2 (50% of the table; stats collected ago; using stats forecast) table: x@x_pkey spans: [/17 - ] + +query T +EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (100% of the table; stats collected ago) + table: d@d_pkey + spans: [/'1999-12-16' - ] diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 6c815e1a1a03..8cc8e5d8f56e 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -293,6 +293,9 @@ func tableTypeCanHaveStats(table catalog.TableDescriptor) bool { // forecastAllowed returns true if statistics forecasting is allowed for the // given table. func forecastAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool { + if enabled, ok := table.ForecastStatsEnabled(); ok { + return enabled + } return UseStatisticsForecasts.Get(&clusterSettings.SV) } diff --git a/pkg/sql/storageparam/tablestorageparam/table_storage_param.go b/pkg/sql/storageparam/tablestorageparam/table_storage_param.go index b21f314efaba..383ac20f2d71 100644 --- a/pkg/sql/storageparam/tablestorageparam/table_storage_param.go +++ b/pkg/sql/storageparam/tablestorageparam/table_storage_param.go @@ -449,6 +449,22 @@ var tableParams = map[string]tableParam{ onSet: autoStatsFractionStaleRowsSettingFunc(settings.NonNegativeFloat), onReset: autoStatsTableSettingResetFunc, }, + `sql_stats_forecasts_enabled`: { + onSet: func( + po *Setter, semaCtx *tree.SemaContext, evalCtx *eval.Context, key string, datum tree.Datum, + ) error { + enabled, err := boolFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + po.tableDesc.ForecastStats = &enabled + return nil + }, + onReset: func(po *Setter, evalCtx *eval.Context, key string) error { + po.tableDesc.ForecastStats = nil + return nil + }, + }, } func init() {