Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stats] stats table name sensitivity tests #8684

Merged
merged 13 commits into from
Dec 23, 2024
4 changes: 2 additions & 2 deletions go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1663,9 +1663,9 @@ func TestStatsHistograms(t *testing.T) {

// TestStatsIO force a provider reload in-between setup and assertions that
// forces a round trip of the statistics table before inspecting values.
func TestStatsIO(t *testing.T) {
func TestStatsStorage(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunStatsIOTests(t, h)
RunStatsStorageTests(t, h)
}

func TestStatsIOWithoutReload(t *testing.T) {
Expand Down
6 changes: 3 additions & 3 deletions go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1553,8 +1553,8 @@ func RunStatsHistogramTests(t *testing.T, h DoltEnginetestHarness) {
}
}

func RunStatsIOTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsIOTests, DoltHistogramTests...) {
func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
Expand All @@ -1569,7 +1569,7 @@ func RunStatsIOTests(t *testing.T, h DoltEnginetestHarness) {
}

func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsIOTests, DoltHistogramTests...) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
Expand Down
79 changes: 75 additions & 4 deletions go/libraries/doltcore/sqle/enginetest/stats_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ var DoltHistogramTests = []queries.ScriptTest{
},
}

var DoltStatsIOTests = []queries.ScriptTest{
var DoltStatsStorageTests = []queries.ScriptTest{
{
Name: "single-table",
SetUpScript: []string{
Expand Down Expand Up @@ -569,6 +569,73 @@ var DoltStatsIOTests = []queries.ScriptTest{
},
},
},
{
Name: "differentiate table cases",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table XY (x bigint primary key, y varchar(16))",
"insert into XY values (0,'0'), (1,'1'), (2,'2')",
"analyze table XY",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "deleted table loads OK",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"CREATE table uv (u bigint primary key, v varchar(16))",
"insert into uv values (0,'0'), (1,'1'), (2,'2')",
"analyze table uv",
"drop table uv",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "differentiate branch names",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main,feat'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"call dolt_checkout('-b', 'feat')",
"CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
"insert into xy values (3,'3',3)",
"analyze table xy",
"call dolt_checkout('main')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
{
Query: "call dolt_checkout('feat')",
},
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "3"}},
},
},
},
{
Name: "drop primary key",
SetUpScript: []string{
Expand Down Expand Up @@ -963,11 +1030,15 @@ func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine,
t.Errorf("expected *gms.Engine but found: %T", e)
}

branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
brCopy := make([]string, len(branches))
copy(brCopy, branches)
err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
require.NoError(t, err)

err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", "main")
require.NoError(t, err)
for _, branch := range brCopy {
err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
require.NoError(t, err)
}
}

for _, assertion := range assertions {
Expand Down
35 changes: 25 additions & 10 deletions go/libraries/doltcore/sqle/statsnoms/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,20 @@ func (n *NomsStatsDatabase) Branches() []string {
}

func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
if ok, err := n.SchemaChange(ctx, branch); err != nil {
branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)

dSess := dsess.DSessFromSess(ctx.Session)
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
if err != nil {
ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
return nil
}
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
if !ok {
return fmt.Errorf("branch/database not found: %s", branchQDbName)
}

if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
return err
} else if ok {
ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
Expand All @@ -164,7 +177,7 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
return n.trackBranch(ctx, branch)
}

doltStats, err := loadStats(ctx, n.sourceDb, statsMap)
doltStats, err := loadStats(ctx, branchQDb, statsMap)
if err != nil {
return err
}
Expand All @@ -176,12 +189,12 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
return nil
}

func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool, error) {
root, err := n.sourceDb.GetRoot(ctx)
func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
root, err := branchQDb.GetRoot(ctx)
if err != nil {
return false, err
}
tables, err := n.sourceDb.GetTableNames(ctx)
tables, err := branchQDb.GetTableNames(ctx)
if err != nil {
return false, err
}
Expand All @@ -201,7 +214,7 @@ func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool,
return false, err
}

keys = append(keys, branch+"/"+tableName)
keys = append(keys, n.schemaTupleKey(branch, tableName))
schHashes = append(schHashes, curHash)
}

Expand All @@ -217,8 +230,6 @@ func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool,
schemaChange = true
break
}
} else if err != nil {
return false, err
}
}
if schemaChange {
Expand Down Expand Up @@ -438,7 +449,7 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName
if strings.EqualFold(branch, b) {
return n.schemaHashes[i][tableName], nil
}
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, branch+"/"+tableName); ok {
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
if err != nil {
return hash.Hash{}, err
}
Expand All @@ -453,6 +464,10 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName
return hash.Hash{}, nil
}

func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
return n.sourceDb.Name() + "/" + branch + "/" + tableName
}

func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
n.mu.Lock()
defer n.mu.Unlock()
Expand All @@ -471,7 +486,7 @@ func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName
}

n.schemaHashes[branchIdx][tableName] = h
key := branch + "/" + tableName
key := n.schemaTupleKey(branch, tableName)
if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
return err
}
Expand Down
99 changes: 63 additions & 36 deletions go/libraries/doltcore/sqle/statsnoms/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
return nil, err
}
currentStat := statspro.NewDoltStats()
invalidTables := make(map[string]bool)
for {
row, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
Expand Down Expand Up @@ -74,27 +75,31 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
}

qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
if _, ok := invalidTables[tableName]; ok {
continue
}

if currentStat.Statistic.Qual.String() != qual.String() {
if !currentStat.Statistic.Qual.Empty() {
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
if err != nil {
return nil, err
}
currentStat.Statistic.Fds = fds
currentStat.Statistic.Colset = colSet
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}

currentStat = statspro.NewDoltStats()
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {

tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
if ok {
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
} else if !ok {
ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
invalidTables[qual.Table()] = true
continue
} else if err != nil {
return nil, err
}
}
Expand Down Expand Up @@ -168,18 +173,10 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
currentStat.Statistic.Created = createdAt
}
}
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
if err != nil {
return nil, err
if !currentStat.Qualifier().Empty() {
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}
currentStat.Statistic.Fds = fds
currentStat.Statistic.Colset = colSet
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
return qualToStats, nil
}

Expand All @@ -195,14 +192,44 @@ func parseTypeStrings(typs []string) ([]sql.Type, error) {
return ret, nil
}

func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, error) {
func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
root, err := db.GetRoot(ctx)
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: qual.Table()})
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}

iat, ok := sqlTable.(sql.IndexAddressable)
if !ok {
return nil, nil, nil, sql.ColSet{}, nil
}

indexes, err := iat.GetIndexes(ctx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}

var sqlIdx sql.Index
for _, i := range indexes {
if strings.EqualFold(i.ID(), qual.Index()) {
sqlIdx = i
break
}
}

if sqlIdx == nil {
return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
}

fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
if !ok {
return nil, nil, sql.ErrTableNotFound.New(qual.Table())
return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
}
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}

var idx durable.Index
Expand All @@ -212,25 +239,25 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
idx, err = table.GetIndexRowData(ctx, qual.Index())
}
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}

prollyMap := durable.ProllyMapFromIndex(idx)
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
buffPool := prollyMap.NodeStore().Pool()

if cnt, err := prollyMap.Count(); err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
} else if cnt == 0 {
return nil, keyBuilder, nil
return nil, keyBuilder, nil, sql.ColSet{}, nil
}
firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
keyBytes, _, err := firstIter.Next(ctx)
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
Expand All @@ -241,10 +268,10 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
for i := 0; i < keyBuilder.Desc.Count(); i++ {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
}
return firstRow, keyBuilder, nil
return firstRow, keyBuilder, fds, colset, nil
}

func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
Expand Down
Loading
Loading