diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 3ec751b2b80b..863c2cf2c793 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -264,6 +264,7 @@ sql.stats.automatic_collection.min_stale_rows integer 500 target minimum number sql.stats.cleanup.recurrence string @hourly cron-tab recurrence for SQL Stats cleanup job sql.stats.flush.enabled boolean true if set, SQL execution statistics are periodically flushed to disk sql.stats.flush.interval duration 10m0s the interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to sql.stats.aggregation.interval +sql.stats.forecasts.enabled boolean true when true, enables use of statistics forecasts sql.stats.histogram_collection.enabled boolean true histogram collection mode sql.stats.multi_column_collection.enabled boolean true multi-column statistics collection mode sql.stats.non_default_columns.min_retention_period duration 24h0m0s minimum retention period for table statistics collected on non-default columns diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 4a6ae09aa22f..52df88ee03c3 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -198,6 +198,7 @@ sql.stats.cleanup.recurrencestring@hourlycron-tab recurrence for SQL Stats cleanup job sql.stats.flush.enabledbooleantrueif set, SQL execution statistics are periodically flushed to disk sql.stats.flush.intervalduration10m0sthe interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to sql.stats.aggregation.interval +sql.stats.forecasts.enabledbooleantruewhen true, enables use of statistics forecasts sql.stats.histogram_collection.enabledbooleantruehistogram collection mode sql.stats.multi_column_collection.enabledbooleantruemulti-column statistics collection mode sql.stats.non_default_columns.min_retention_periodduration24h0m0sminimum retention period for table statistics collected on non-default columns diff --git a/pkg/sql/opt/exec/execbuilder/testdata/forecast b/pkg/sql/opt/exec/execbuilder/testdata/forecast index 56427ea9f783..17654977dd22 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/forecast +++ b/pkg/sql/opt/exec/execbuilder/testdata/forecast @@ -842,3 +842,100 @@ scan x ├── cost: 16.04 ├── key: (1) └── distribution: test + +# Test that sql.stats.forecasts.enabled can be used to enable and disable +# generation of forecasts in the stats cache. + +statement ok +SET CLUSTER SETTING sql.stats.forecasts.enabled = false + +query T +EXPLAIN SELECT * FROM g WHERE a > 8 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 0 (<0.01% of the table; stats collected ago) + table: g@g_pkey + spans: [/9 - ] + +query T +EXPLAIN SELECT * FROM s WHERE b < 3 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (100% of the table; stats collected ago) + table: s@s_pkey + spans: [ - /2] + +query T +EXPLAIN SELECT * FROM c WHERE h > '1988-08-07' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 0 (<0.01% of the table; stats collected ago) + table: c@c_pkey + spans: [/'1988-08-07 00:00:00.000001+00:00' - ] + +query T +EXPLAIN SELECT * FROM x WHERE a > 16 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 0 (<0.01% of the table; stats collected ago) + table: x@x_pkey + spans: [/17 - ] + +statement ok +RESET CLUSTER SETTING sql.stats.forecasts.enabled + +query T +EXPLAIN SELECT * FROM g WHERE a > 8 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (22% of the table; stats collected ago; using stats forecast) + table: g@g_pkey + spans: [/9 - ] + +query T +EXPLAIN SELECT * FROM s WHERE b < 3 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 1 (100% of the table; stats collected ago; using stats forecast) + table: s@s_pkey + spans: [ - /2] + +query T +EXPLAIN SELECT * FROM c WHERE h > '1988-08-07' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 23 (96% of the table; stats collected ago; using stats forecast) + table: c@c_pkey + spans: [/'1988-08-07 00:00:00.000001+00:00' - ] + +query T +EXPLAIN SELECT * FROM x WHERE a > 16 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 2 (50% of the table; stats collected ago; using stats forecast) + table: x@x_pkey + spans: [/17 - ] diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 3ef24c81ac10..de6576f458b1 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -467,7 +467,10 @@ func (r *Refresher) Start( explicitSettings = &settings } } - r.maybeRefreshStats(ctx, tableID, explicitSettings, rowsAffected, r.asOfTime) + forecast := forecastAllowed(desc, r.st) + r.maybeRefreshStats( + ctx, tableID, explicitSettings, forecast, rowsAffected, r.asOfTime, + ) select { case <-stopper.ShouldQuiesce(): @@ -675,10 +678,11 @@ func (r *Refresher) maybeRefreshStats( ctx context.Context, tableID descpb.ID, explicitSettings *catpb.AutoStatsSettings, + forecast bool, rowsAffected int64, asOf time.Duration, ) { - tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID) + tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, forecast) if err != nil { log.Errorf(ctx, "failed to get table statistics: %v", err) return diff --git a/pkg/sql/stats/forecast.go b/pkg/sql/stats/forecast.go index 08670e78ab23..6362a57df77a 100644 --- a/pkg/sql/stats/forecast.go +++ b/pkg/sql/stats/forecast.go @@ -16,6 +16,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/types" @@ -24,6 +25,15 @@ import ( "github.com/cockroachdb/redact" ) +// UseStatisticsForecasts controls whether statistics forecasts are generated in +// the stats cache. +var UseStatisticsForecasts = settings.RegisterBoolSetting( + settings.TenantWritable, + "sql.stats.forecasts.enabled", + "when true, enables generation of statistics forecasts", + true, +).WithPublic() + // minObservationsForForecast is the minimum number of observed statistics // required to produce a statistics forecast. Forecasts based on 1 or 2 // observations will always have R² = 1 (perfect goodness of fit) regardless of diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 9a273b90e937..42ed6d826a79 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -102,6 +102,9 @@ type cacheEntry struct { // timestamp was moved, it will trigger another refresh. refreshing bool + // forecast is true if stats could contain forecasts. + forecast bool + stats []*TableStatistic // err is populated if the internal query to retrieve stats hit an error. @@ -217,7 +220,7 @@ func (sc *TableStatisticsCache) GetTableStats( if !statsUsageAllowed(table, sc.Settings) { return nil, nil } - return sc.getTableStatsFromCache(ctx, table.GetID()) + return sc.getTableStatsFromCache(ctx, table.GetID(), forecastAllowed(table, sc.Settings)) } func statsDisallowedSystemTable(tableID descpb.ID) bool { @@ -286,19 +289,31 @@ func tableTypeCanHaveStats(table catalog.TableDescriptor) bool { return true } +// forecastAllowed returns true if statistics forecasting is allowed for the +// given table. +func forecastAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool { + return UseStatisticsForecasts.Get(&clusterSettings.SV) +} + // getTableStatsFromCache is like GetTableStats but assumes that the table ID // is safe to fetch statistics for: non-system, non-virtual, non-view, etc. func (sc *TableStatisticsCache) getTableStatsFromCache( - ctx context.Context, tableID descpb.ID, + ctx context.Context, tableID descpb.ID, forecast bool, ) ([]*TableStatistic, error) { sc.mu.Lock() defer sc.mu.Unlock() if found, e := sc.lookupStatsLocked(ctx, tableID, false /* stealthy */); found { - return e.stats, e.err + if e.forecast != forecast { + // Forecasting was recently enabled or disabled on this table. Evict the + // cache entry and build it again. + sc.mu.cache.Del(tableID) + } else { + return e.stats, e.err + } } - return sc.addCacheEntryLocked(ctx, tableID) + return sc.addCacheEntryLocked(ctx, tableID, forecast) } // lookupStatsLocked retrieves any existing stats for the given table. @@ -351,7 +366,7 @@ func (sc *TableStatisticsCache) lookupStatsLocked( // - mutex is locked again and the entry is updated. // func (sc *TableStatisticsCache) addCacheEntryLocked( - ctx context.Context, tableID descpb.ID, + ctx context.Context, tableID descpb.ID, forecast bool, ) (stats []*TableStatistic, err error) { // Add a cache entry that other queries can find and wait on until we have the // stats. @@ -367,12 +382,12 @@ func (sc *TableStatisticsCache) addCacheEntryLocked( defer sc.mu.Lock() log.VEventf(ctx, 1, "reading statistics for table %d", tableID) - stats, err = sc.getTableStatsFromDB(ctx, tableID) + stats, err = sc.getTableStatsFromDB(ctx, tableID, forecast) log.VEventf(ctx, 1, "finished reading statistics for table %d", tableID) }() e.mustWait = false - e.stats, e.err = stats, err + e.forecast, e.stats, e.err = forecast, stats, err // Wake up any other callers that are waiting on these stats. e.waitCond.Broadcast() @@ -422,6 +437,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( } e.refreshing = true + forecast := e.forecast var stats []*TableStatistic var err error for { @@ -432,7 +448,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( log.VEventf(ctx, 1, "refreshing statistics for table %d", tableID) // TODO(radu): pass the timestamp and use AS OF SYSTEM TIME. - stats, err = sc.getTableStatsFromDB(ctx, tableID) + stats, err = sc.getTableStatsFromDB(ctx, tableID, forecast) log.VEventf(ctx, 1, "done refreshing statistics for table %d", tableID) }() if e.lastRefreshTimestamp.Equal(ts) { @@ -678,7 +694,7 @@ func (tabStat *TableStatistic) String() string { // It ignores any statistics that cannot be decoded (e.g. because a user-defined // type that doesn't exist) and returns the rest (with no error). func (sc *TableStatisticsCache) getTableStatsFromDB( - ctx context.Context, tableID descpb.ID, + ctx context.Context, tableID descpb.ID, forecast bool, ) ([]*TableStatistic, error) { const getTableStatisticsStmt = ` SELECT @@ -720,8 +736,10 @@ ORDER BY "createdAt" DESC, "columnIDs" DESC, "statisticID" DESC return nil, err } - forecasts := ForecastTableStatistics(ctx, statsList) - statsList = append(forecasts, statsList...) + if forecast { + forecasts := ForecastTableStatistics(ctx, statsList) + statsList = append(forecasts, statsList...) + } return statsList, nil }