Skip to content

Commit

Permalink
sql/opt: add session variable to disable stats forecast use in optimizer
Browse files Browse the repository at this point in the history
Add a new session variable, `optimizer_use_forecasts`, which can be used
to disable forecast usage. Forecasts will still be generated in the
stats cache (this will be controlled by a different variable).

Assists: cockroachdb#86350

Release justification: Low-risk update to new functionality.

Release note (sql change): This commit adds a new session variable,
`optimizer_use_forecasts`, which can be set to false to disable usage of
statistics forecasts when optimizing a query.
  • Loading branch information
michae2 committed Aug 25, 2022
1 parent 54bc65f commit 67f9187
Show file tree
Hide file tree
Showing 16 changed files with 184 additions and 19 deletions.
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3048,6 +3048,10 @@ func (m *sessionDataMutator) SetOptimizerFKCascadesLimit(val int) {
m.data.OptimizerFKCascadesLimit = int64(val)
}

func (m *sessionDataMutator) SetOptimizerUseForecasts(val bool) {
m.data.OptimizerUseForecasts = val
}

func (m *sessionDataMutator) SetOptimizerUseHistograms(val bool) {
m.data.OptimizerUseHistograms = val
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/explain_bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ func (c *stmtEnvCollector) PrintSessionSettings(w io.Writer) error {
{sessionSetting: "null_ordered_last"},
{sessionSetting: "on_update_rehome_row_enabled", clusterSetting: onUpdateRehomeRowEnabledClusterMode, convFunc: boolToOnOff},
{sessionSetting: "opt_split_scan_limit"},
{sessionSetting: "optimizer_use_forecasts", convFunc: boolToOnOff},
{sessionSetting: "optimizer_use_histograms", clusterSetting: optUseHistogramsClusterMode, convFunc: boolToOnOff},
{sessionSetting: "optimizer_use_multicol_stats", clusterSetting: optUseMultiColStatsClusterMode, convFunc: boolToOnOff},
{sessionSetting: "optimizer_use_not_visible_indexes"},
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,12 @@ SELECT start_pretty, end_pretty FROM crdb_internal.ranges
WHERE split_enforced_until IS NOT NULL
AND (start_pretty LIKE '/Table/112/1%' OR start_pretty LIKE '/Table/112/2%')
----
/Table/112/1/1 /Table/112/1/2
/Table/112/1/2 /Table/112/1/3
/Table/112/1/3 /Table/112/2/1
/Table/112/2/1 /Table/112/2/2
/Table/112/2/2 /Table/112/2/3
/Table/112/2/3 /Table/112/3/1

statement ok
ALTER TABLE foo SPLIT AT VALUES (1), (2), (3)
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4743,6 +4743,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_multicol_stats on
optimizer_use_not_visible_indexes off
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -4223,6 +4223,7 @@ node_id 1 NULL
null_ordered_last off NULL NULL NULL string
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_multicol_stats on NULL NULL NULL string
optimizer_use_not_visible_indexes off NULL NULL NULL string
Expand Down Expand Up @@ -4350,6 +4351,7 @@ node_id 1 NULL
null_ordered_last off NULL user NULL off off
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_multicol_stats on NULL user NULL on on
optimizer_use_not_visible_indexes off NULL user NULL off off
Expand Down Expand Up @@ -4475,6 +4477,7 @@ null_ordered_last NULL NULL NULL
on_update_rehome_row_enabled NULL NULL NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_multicol_stats NULL NULL NULL NULL NULL
optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/schema
Original file line number Diff line number Diff line change
Expand Up @@ -885,4 +885,3 @@ DROP SCHEMA sc

statement ok
SET CLUSTER SETTING server.eventlog.enabled = false

1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ node_id 1
null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_multicol_stats on
optimizer_use_not_visible_indexes off
Expand Down
26 changes: 13 additions & 13 deletions pkg/sql/opt/exec/execbuilder/testdata/explain_env

Large diffs are not rendered by default.

108 changes: 108 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/forecast
Original file line number Diff line number Diff line change
Expand Up @@ -734,3 +734,111 @@ WHERE stat->>'name' = '__forecast__';
"upper_bound": "19"
}
]

# Test that optimizer_use_forecasts can be used to enable and disable forecasts.

statement ok
SET optimizer_use_forecasts = off

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM g WHERE a > 8
----
scan g
├── columns: a:1
├── constraint: /1: [/9 - ]
├── stats: [rows=1.8e-09, distinct(1)=1.8e-09, null(1)=0]
│ histogram(1)=
├── cost: 14.02
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM s WHERE b < 3
----
scan s
├── columns: b:1
├── constraint: /1: [ - /2]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 1 0 1 0 1
│ <--- 0 --- 1 --- 2
├── cost: 17.05
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM c WHERE h > '1988-08-07'
----
scan c
├── columns: h:1
├── constraint: /1: [/'1988-08-07 00:00:00.000001+00:00' - ]
├── stats: [rows=4.8e-09, distinct(1)=4.8e-09, null(1)=0]
│ histogram(1)=
├── cost: 14.02
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM x WHERE a > 16
----
scan x
├── columns: a:1
├── constraint: /1: [/17 - ]
├── stats: [rows=8e-10, distinct(1)=8e-10, null(1)=0]
│ histogram(1)=
├── cost: 14.02
├── key: (1)
└── distribution: test

statement ok
RESET optimizer_use_forecasts

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM g WHERE a > 8
----
scan g
├── columns: a:1
├── constraint: /1: [/9 - ]
├── stats: [rows=2.666667, distinct(1)=2.33333, null(1)=0]
│ histogram(1)= 0 0.66667 0 0.66667 0 1.3333
│ <----- 9 ------ 10 ------ 11 -
├── cost: 16.7133333
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM s WHERE b < 3
----
scan s
├── columns: b:1
├── constraint: /1: [ - /2]
├── stats: [rows=1, distinct(1)=1, null(1)=0]
│ histogram(1)=
├── cost: 15.03
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM c WHERE h > '1988-08-07'
----
scan c
├── columns: h:1
├── constraint: /1: [/'1988-08-07 00:00:00.000001+00:00' - ]
├── stats: [rows=23, distinct(1)=23, null(1)=0]
│ histogram(1)= 0 0 5 1 5 1 5 1 5 0
│ <--- '1988-08-07 00:00:00+00:00' --- '1988-08-07 06:00:00+00:00' --- '1988-08-07 12:00:00+00:00' --- '1988-08-07 18:00:00+00:00' --- '1988-08-08 00:00:00+00:00'
├── cost: 38.63
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM x WHERE a > 16
----
scan x
├── columns: a:1
├── constraint: /1: [/17 - ]
├── stats: [rows=2, distinct(1)=2, null(1)=0]
│ histogram(1)= 0 0 2 0
│ <--- 16 --- 19
├── cost: 16.04
├── key: (1)
└── distribution: test
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ type Memo struct {
// fields in explain_bundle.go.
reorderJoinsLimit int
zigzagJoinEnabled bool
useForecasts bool
useHistograms bool
useMultiColStats bool
useNotVisibleIndex bool
Expand Down Expand Up @@ -187,6 +188,7 @@ func (m *Memo) Init(evalCtx *eval.Context) {
metadata: m.metadata,
reorderJoinsLimit: int(evalCtx.SessionData().ReorderJoinsLimit),
zigzagJoinEnabled: evalCtx.SessionData().ZigzagJoinEnabled,
useForecasts: evalCtx.SessionData().OptimizerUseForecasts,
useHistograms: evalCtx.SessionData().OptimizerUseHistograms,
useMultiColStats: evalCtx.SessionData().OptimizerUseMultiColStats,
useNotVisibleIndex: evalCtx.SessionData().OptimizerUseNotVisibleIndexes,
Expand Down Expand Up @@ -321,6 +323,7 @@ func (m *Memo) IsStale(
// changed.
if m.reorderJoinsLimit != int(evalCtx.SessionData().ReorderJoinsLimit) ||
m.zigzagJoinEnabled != evalCtx.SessionData().ZigzagJoinEnabled ||
m.useForecasts != evalCtx.SessionData().OptimizerUseForecasts ||
m.useHistograms != evalCtx.SessionData().OptimizerUseHistograms ||
m.useMultiColStats != evalCtx.SessionData().OptimizerUseMultiColStats ||
m.useNotVisibleIndex != evalCtx.SessionData().OptimizerUseNotVisibleIndexes ||
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().ZigzagJoinEnabled = false
notStale()

// Stale optimizer forecast usage enable.
evalCtx.SessionData().OptimizerUseForecasts = true
stale()
evalCtx.SessionData().OptimizerUseForecasts = false
notStale()

// Stale optimizer histogram usage enable.
evalCtx.SessionData().OptimizerUseHistograms = true
stale()
Expand Down
21 changes: 16 additions & 5 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -610,24 +610,35 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati

// Make now and annotate the metadata table with it for next time.
stats = &props.Statistics{}
if tab.StatisticCount() == 0 {

// Find the most recent statistic. (Stats are ordered with most recent first.)
var first int
if !sb.evalCtx.SessionData().OptimizerUseForecasts {
for first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
first++
}
}

if first >= tab.StatisticCount() {
// No statistics.
stats.Available = false
stats.RowCount = unknownRowCount
} else {
// Get the RowCount from the most recent statistic. Stats are ordered
// with most recent first.
// Use the RowCount from the most recent statistic.
stats.Available = true
stats.RowCount = float64(tab.Statistic(0).RowCount())
stats.RowCount = float64(tab.Statistic(first).RowCount())

// Make sure the row count is at least 1. The stats may be stale, and we
// can end up with weird and inefficient plans if we estimate 0 rows.
stats.RowCount = max(stats.RowCount, 1)

// Add all the column statistics, using the most recent statistic for each
// column set. Stats are ordered with most recent first.
for i := 0; i < tab.StatisticCount(); i++ {
for i := first; i < tab.StatisticCount(); i++ {
stat := tab.Statistic(i)
if stat.IsForecast() && !sb.evalCtx.SessionData().OptimizerUseForecasts {
continue
}
if stat.ColumnCount() > 1 && !sb.evalCtx.SessionData().OptimizerUseMultiColStats {
continue
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/optbuilder/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ func TestBuilder(t *testing.T) {
ctx := context.Background()
semaCtx := tree.MakeSemaContext()
evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
evalCtx.SessionData().OptimizerUseForecasts = true
evalCtx.SessionData().OptimizerUseHistograms = true
evalCtx.SessionData().OptimizerUseMultiColStats = true
evalCtx.SessionData().LocalityOptimizedSearch = true
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/testutils/opttester/opt_tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ func New(catalog cat.Catalog, sql string) *OptTester {
ot.evalCtx.SessionData().UserProto = username.MakeSQLUsernameFromPreNormalizedString("opttester").EncodeProto()
ot.evalCtx.SessionData().Database = "defaultdb"
ot.evalCtx.SessionData().ZigzagJoinEnabled = true
ot.evalCtx.SessionData().OptimizerUseForecasts = true
ot.evalCtx.SessionData().OptimizerUseHistograms = true
ot.evalCtx.SessionData().LocalityOptimizedSearch = true
ot.evalCtx.SessionData().ReorderJoinsLimit = opt.DefaultJoinOrderLimit
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/sessiondatapb/local_only_session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ message LocalOnlySessionData {
// disable_hoist_projection_in_join_limitation disables the restrictions
// placed on projection hoisting during query planning in the optimizer.
bool disable_hoist_projection_in_join_limitation = 76;
// OptimizerUseForecasts indicates whether we should use statistics forecasts
// for cardinality estimation in the optimizer.
bool optimizer_use_forecasts = 77;

///////////////////////////////////////////////////////////////////////////
// WARNING: consider whether a session parameter you're adding needs to //
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,23 @@ var varGen = map[string]sessionVar{
},
},

// CockroachDB extension.
`optimizer_use_forecasts`: {
GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_forecasts`),
Set: func(_ context.Context, m sessionDataMutator, s string) error {
b, err := paramparse.ParseBoolVar("optimizer_use_forecasts", s)
if err != nil {
return err
}
m.SetOptimizerUseForecasts(b)
return nil
},
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseForecasts), nil
},
GlobalDefault: globalTrue,
},

// CockroachDB extension.
`optimizer_use_histograms`: {
GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_histograms`),
Expand Down

0 comments on commit 67f9187

Please sign in to comment.