Skip to content

Commit

Permalink
[stats] stats table name sensitivity tests (#8684)
Browse files Browse the repository at this point in the history
* table name sensitivity tests

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* fix bats

* build issue

* disable stats collection for slow bats test

* use dsess.SqlDatabase for branchdb

* see if disabling bootstrap makes a difference for timeout test

* no lambda bats for slow diff query

* undo lambda change

* nick comments

---------

Co-authored-by: max-hoffman <[email protected]>
  • Loading branch information
max-hoffman and max-hoffman authored Dec 23, 2024
1 parent 049dea8 commit d98baaf
Show file tree
Hide file tree
Showing 12 changed files with 198 additions and 65 deletions.
4 changes: 2 additions & 2 deletions go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1663,9 +1663,9 @@ func TestStatsHistograms(t *testing.T) {

// TestStatsIO force a provider reload in-between setup and assertions that
// forces a round trip of the statistics table before inspecting values.
func TestStatsIO(t *testing.T) {
func TestStatsStorage(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunStatsIOTests(t, h)
RunStatsStorageTests(t, h)
}

func TestStatsIOWithoutReload(t *testing.T) {
Expand Down
6 changes: 3 additions & 3 deletions go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1553,8 +1553,8 @@ func RunStatsHistogramTests(t *testing.T, h DoltEnginetestHarness) {
}
}

func RunStatsIOTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsIOTests, DoltHistogramTests...) {
func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
Expand All @@ -1569,7 +1569,7 @@ func RunStatsIOTests(t *testing.T, h DoltEnginetestHarness) {
}

func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsIOTests, DoltHistogramTests...) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
Expand Down
79 changes: 75 additions & 4 deletions go/libraries/doltcore/sqle/enginetest/stats_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ var DoltHistogramTests = []queries.ScriptTest{
},
}

var DoltStatsIOTests = []queries.ScriptTest{
var DoltStatsStorageTests = []queries.ScriptTest{
{
Name: "single-table",
SetUpScript: []string{
Expand Down Expand Up @@ -569,6 +569,73 @@ var DoltStatsIOTests = []queries.ScriptTest{
},
},
},
{
Name: "differentiate table cases",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table XY (x bigint primary key, y varchar(16))",
"insert into XY values (0,'0'), (1,'1'), (2,'2')",
"analyze table XY",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "deleted table loads OK",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"CREATE table uv (u bigint primary key, v varchar(16))",
"insert into uv values (0,'0'), (1,'1'), (2,'2')",
"analyze table uv",
"drop table uv",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "differentiate branch names",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main,feat'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"call dolt_checkout('-b', 'feat')",
"CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
"insert into xy values (3,'3',3)",
"analyze table xy",
"call dolt_checkout('main')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
{
Query: "call dolt_checkout('feat')",
},
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "3"}},
},
},
},
{
Name: "drop primary key",
SetUpScript: []string{
Expand Down Expand Up @@ -963,11 +1030,15 @@ func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine,
t.Errorf("expected *gms.Engine but found: %T", e)
}

branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
brCopy := make([]string, len(branches))
copy(brCopy, branches)
err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
require.NoError(t, err)

err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", "main")
require.NoError(t, err)
for _, branch := range brCopy {
err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
require.NoError(t, err)
}
}

for _, assertion := range assertions {
Expand Down
35 changes: 25 additions & 10 deletions go/libraries/doltcore/sqle/statsnoms/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,20 @@ func (n *NomsStatsDatabase) Branches() []string {
}

func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
if ok, err := n.SchemaChange(ctx, branch); err != nil {
branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)

dSess := dsess.DSessFromSess(ctx.Session)
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
if err != nil {
ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
return nil
}
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
if !ok {
return fmt.Errorf("branch/database not found: %s", branchQDbName)
}

if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
return err
} else if ok {
ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
Expand All @@ -164,7 +177,7 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
return n.trackBranch(ctx, branch)
}

doltStats, err := loadStats(ctx, n.sourceDb, statsMap)
doltStats, err := loadStats(ctx, branchQDb, statsMap)
if err != nil {
return err
}
Expand All @@ -176,12 +189,12 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
return nil
}

func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool, error) {
root, err := n.sourceDb.GetRoot(ctx)
func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
root, err := branchQDb.GetRoot(ctx)
if err != nil {
return false, err
}
tables, err := n.sourceDb.GetTableNames(ctx)
tables, err := branchQDb.GetTableNames(ctx)
if err != nil {
return false, err
}
Expand All @@ -201,7 +214,7 @@ func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool,
return false, err
}

keys = append(keys, branch+"/"+tableName)
keys = append(keys, n.schemaTupleKey(branch, tableName))
schHashes = append(schHashes, curHash)
}

Expand All @@ -217,8 +230,6 @@ func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool,
schemaChange = true
break
}
} else if err != nil {
return false, err
}
}
if schemaChange {
Expand Down Expand Up @@ -438,7 +449,7 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName
if strings.EqualFold(branch, b) {
return n.schemaHashes[i][tableName], nil
}
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, branch+"/"+tableName); ok {
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
if err != nil {
return hash.Hash{}, err
}
Expand All @@ -453,6 +464,10 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName
return hash.Hash{}, nil
}

func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
return n.sourceDb.Name() + "/" + branch + "/" + tableName
}

func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
n.mu.Lock()
defer n.mu.Unlock()
Expand All @@ -471,7 +486,7 @@ func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName
}

n.schemaHashes[branchIdx][tableName] = h
key := branch + "/" + tableName
key := n.schemaTupleKey(branch, tableName)
if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
return err
}
Expand Down
99 changes: 63 additions & 36 deletions go/libraries/doltcore/sqle/statsnoms/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
return nil, err
}
currentStat := statspro.NewDoltStats()
invalidTables := make(map[string]bool)
for {
row, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
Expand Down Expand Up @@ -74,27 +75,31 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
}

qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
if _, ok := invalidTables[tableName]; ok {
continue
}

if currentStat.Statistic.Qual.String() != qual.String() {
if !currentStat.Statistic.Qual.Empty() {
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
if err != nil {
return nil, err
}
currentStat.Statistic.Fds = fds
currentStat.Statistic.Colset = colSet
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}

currentStat = statspro.NewDoltStats()
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {

tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
if ok {
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
} else if !ok {
ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
invalidTables[qual.Table()] = true
continue
} else if err != nil {
return nil, err
}
}
Expand Down Expand Up @@ -168,18 +173,10 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
currentStat.Statistic.Created = createdAt
}
}
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
if err != nil {
return nil, err
if !currentStat.Qualifier().Empty() {
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}
currentStat.Statistic.Fds = fds
currentStat.Statistic.Colset = colSet
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
return qualToStats, nil
}

Expand All @@ -195,14 +192,44 @@ func parseTypeStrings(typs []string) ([]sql.Type, error) {
return ret, nil
}

func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, error) {
func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
root, err := db.GetRoot(ctx)
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: qual.Table()})
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}

iat, ok := sqlTable.(sql.IndexAddressable)
if !ok {
return nil, nil, nil, sql.ColSet{}, nil
}

indexes, err := iat.GetIndexes(ctx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}

var sqlIdx sql.Index
for _, i := range indexes {
if strings.EqualFold(i.ID(), qual.Index()) {
sqlIdx = i
break
}
}

if sqlIdx == nil {
return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
}

fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
if !ok {
return nil, nil, sql.ErrTableNotFound.New(qual.Table())
return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
}
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}

var idx durable.Index
Expand All @@ -212,25 +239,25 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
idx, err = table.GetIndexRowData(ctx, qual.Index())
}
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}

prollyMap := durable.ProllyMapFromIndex(idx)
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
buffPool := prollyMap.NodeStore().Pool()

if cnt, err := prollyMap.Count(); err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
} else if cnt == 0 {
return nil, keyBuilder, nil
return nil, keyBuilder, nil, sql.ColSet{}, nil
}
firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
keyBytes, _, err := firstIter.Next(ctx)
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
Expand All @@ -241,10 +268,10 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
for i := 0; i < keyBuilder.Desc.Count(); i++ {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
}
return firstRow, keyBuilder, nil
return firstRow, keyBuilder, fds, colset, nil
}

func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
Expand Down
Loading

0 comments on commit d98baaf

Please sign in to comment.