From 5ae8af3419a2f255e172c27e821736ae02066b88 Mon Sep 17 00:00:00 2001 From: Max Hoffman Date: Wed, 11 Dec 2024 17:21:39 -0800 Subject: [PATCH] fix preexisting drop datebase issue --- .../doltcore/sqle/statsnoms/database.go | 61 +- .../doltcore/sqle/statspro/analyze.go | 2 +- .../doltcore/sqle/statspro/configure.go | 27 +- .../doltcore/sqle/statspro/initdbhook.go | 3 + integration-tests/bats/stats.bats | 830 +++++++++--------- 5 files changed, 467 insertions(+), 456 deletions(-) diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go index ffc4015ee95..912c6d54541 100644 --- a/go/libraries/doltcore/sqle/statsnoms/database.go +++ b/go/libraries/doltcore/sqle/statsnoms/database.go @@ -146,7 +146,6 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err return err } else if !ok { ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name()) - //log.Printf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name()) if err := n.DeleteBranchStats(ctx, branch, true); err != nil { return err } @@ -463,37 +462,45 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error { n.mu.Lock() defer n.mu.Unlock() + branchIdx := -1 for i, b := range n.branches { if strings.EqualFold(branch, b) { - n.schemaHashes[i][tableName] = h - tagRef := ref.NewTagRef(branch + "/" + tableName) - - if _, ok, err := n.destDb.DbData().Ddb.HasTag(ctx, tagRef.GetPath()); ok { - if err := n.destDb.DbData().Ddb.DeleteTag(ctx, tagRef); err != nil { - return err - } - } else if err != nil { - return err - } + branchIdx = i + break + } + } + if branchIdx < 0 { + branchIdx = len(n.branches) + if err := n.trackBranch(ctx, branch); err != nil { + return err + } + } - branchSpec, err := doltdb.NewCommitSpec(branch) - if err != nil { - return err - } + n.schemaHashes[branchIdx][tableName] = h + tagRef := ref.NewTagRef(branch + "/" + tableName) + if _, ok, err := n.destDb.DbData().Ddb.HasTag(ctx, tagRef.GetPath()); ok { + if err := n.destDb.DbData().Ddb.DeleteTag(ctx, tagRef); err != nil { + return err + } + } else if err != nil { + return err + } - headRef, err := n.destDb.DbData().Rsr.CWBHeadRef() - if err != nil { - return err - } + branchSpec, err := doltdb.NewCommitSpec(branch) + if err != nil { + return err + } - optCmt, err := n.destDb.DbData().Ddb.Resolve(ctx, branchSpec, headRef) - if err != nil { - return err - } + headRef, err := n.destDb.DbData().Rsr.CWBHeadRef() + if err != nil { + return err + } - props := datas.NewTagMeta("stats", "stats@dolt.com", h.String()) - return n.destDb.DbData().Ddb.NewTagAtCommit(ctx, tagRef, optCmt.Commit, props) - } + optCmt, err := n.destDb.DbData().Ddb.Resolve(ctx, branchSpec, headRef) + if err != nil { + return err } - return fmt.Errorf("failed to update schema hash tag") + + props := datas.NewTagMeta("stats", "stats@dolt.com", h.String()) + return n.destDb.DbData().Ddb.NewTagAtCommit(ctx, tagRef, optCmt.Commit, props) } diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go index faf1a39a51b..9749ce33f6b 100644 --- a/go/libraries/doltcore/sqle/statspro/analyze.go +++ b/go/libraries/doltcore/sqle/statspro/analyze.go @@ -146,7 +146,7 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() { if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil { - return err + return fmt.Errorf("set schema hash error: %w", err) } } else if oldSchHash != schHash { ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch) diff --git a/go/libraries/doltcore/sqle/statspro/configure.go b/go/libraries/doltcore/sqle/statspro/configure.go index 5058912a422..f8492a08b61 100644 --- a/go/libraries/doltcore/sqle/statspro/configure.go +++ b/go/libraries/doltcore/sqle/statspro/configure.go @@ -16,6 +16,7 @@ package statspro import ( "context" + "fmt" "strings" "time" @@ -67,16 +68,16 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co // copy closure variables db := db eg.Go(func() (err error) { - //defer func() { - // if r := recover(); r != nil { - // if str, ok := r.(fmt.Stringer); ok { - // err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String()) - // } else { - // err = fmt.Errorf("%w: %v", ErrFailedToLoad, r) - // } - // return - // } - //}() + defer func() { + if r := recover(); r != nil { + if str, ok := r.(fmt.Stringer); ok { + err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String()) + } else { + err = fmt.Errorf("%w: %v", ErrFailedToLoad, r) + } + return + } + }() fs, err := p.pro.FileSystemForDatabase(db.Name()) if err != nil { @@ -135,7 +136,7 @@ func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDataba // |statPath| is either file://./stat or mem://stat statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir) if err != nil { - ctx.GetLogger().Errorf("initialize stats failure: %s; %s\n", err.Error(), helpMsg) + ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) return } @@ -143,11 +144,11 @@ func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDataba if err = statsDb.LoadBranchStats(ctx, branch); err != nil { // if branch name is invalid, continue loading rest // TODO: differentiate bad branch name from other errors - ctx.GetLogger().Errorf("load stats init failure: %s; %s\n", err.Error(), helpMsg) + ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) continue } if err := statsDb.Flush(ctx, branch); err != nil { - ctx.GetLogger().Errorf("load stats flush failure: %s; %s\n", err.Error(), helpMsg) + ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) continue } } diff --git a/go/libraries/doltcore/sqle/statspro/initdbhook.go b/go/libraries/doltcore/sqle/statspro/initdbhook.go index 17d2cf8a406..16e97c05ec1 100644 --- a/go/libraries/doltcore/sqle/statspro/initdbhook.go +++ b/go/libraries/doltcore/sqle/statspro/initdbhook.go @@ -45,6 +45,8 @@ func NewStatsInitDatabaseHook( return nil } statsProv.setStatDb(dbName, statsDb) + } else { + ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName) } ctx.GetLogger().Debugf("statistics refresh: initialize %s", name) return statsProv.InitAutoRefresh(ctxFactory, name, bThreads) @@ -62,6 +64,7 @@ func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook { if err := db.Close(); err != nil { ctx.GetLogger().Debugf("failed to close stats database: %s", err) } + delete(statsProv.statDbs, name) } } } diff --git a/integration-tests/bats/stats.bats b/integration-tests/bats/stats.bats index b78cadf1769..7cc4c4bf9f2 100644 --- a/integration-tests/bats/stats.bats +++ b/integration-tests/bats/stats.bats @@ -38,572 +38,572 @@ teardown() { cd $BATS_TMPDIR } -#@test "stats: empty initial stats" { - #cd repo2 +@test "stats: empty initial stats" { + cd repo2 - ## disable bootstrap, can only make stats with ANALYZE or background thread - #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" + # disable bootstrap, can only make stats with ANALYZE or background thread + dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" - #dolt sql -q "insert into xy values (0,0), (1,1)" + dolt sql -q "insert into xy values (0,0), (1,1)" - #start_sql_server - #sleep 1 - #stop_sql_server + start_sql_server + sleep 1 + stop_sql_server - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "0" ] + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] + + # setting variables doesn't hang or error + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;" + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = .5" + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;" - ## setting variables doesn't hang or error - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;" - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = .5" - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;" + # auto refresh initialize at server startup + start_sql_server - ## auto refresh initialize at server startup - #start_sql_server + # need to trigger at least one refresh cycle + sleep 1 - ## need to trigger at least one refresh cycle - #sleep 1 + # only statistics for non-empty tables are collected + run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name" + [ "$status" -eq 0 ] + [ "${lines[0]}" = "database_name,table_name,index_name" ] + [ "${lines[1]}" = "repo2,xy,primary" ] + [ "${lines[2]}" = "repo2,xy,y" ] - ## only statistics for non-empty tables are collected - #run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name" - #[ "$status" -eq 0 ] - #[ "${lines[0]}" = "database_name,table_name,index_name" ] - #[ "${lines[1]}" = "repo2,xy,primary" ] - #[ "${lines[2]}" = "repo2,xy,y" ] + # appending new chunks picked up + dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;" - ## appending new chunks picked up - #dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;" + sleep 1 - #sleep 1 + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "8" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "8" ] + # updates picked up + dolt sql -q "update xy set y = 2 where x between 100 and 800" - ## updates picked up - #dolt sql -q "update xy set y = 2 where x between 100 and 800" + sleep 1 - #sleep 1 + dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "8" ] +} - #dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "8" ] -#} +@test "stats: bootstrap on server startup" { + cd repo2 -#@test "stats: bootstrap on server startup" { - #cd repo2 + # disable higher precedence auto-update + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;" - ## disable higher precedence auto-update - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;" + dolt sql -q "insert into xy values (0,0), (1,1)" - #dolt sql -q "insert into xy values (0,0), (1,1)" + start_sql_server + stop_sql_server - #start_sql_server - #stop_sql_server + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] +} - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] -#} +@test "stats: auto-update on server startup" { + cd repo2 -#@test "stats: auto-update on server startup" { - #cd repo2 + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;" + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0" + dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;" - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;" - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0" - #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "0" ] + start_sql_server + run dolt sql -q "insert into xy values (0,0), (1,1)" + sleep 1 + stop_sql_server - #start_sql_server - #run dolt sql -q "insert into xy values (0,0), (1,1)" - #sleep 1 - #stop_sql_server + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] +} + + +@test "stats: only bootstrap server startup" { + cd repo2 + + dolt sql -q "insert into xy values (0,0), (1,1)" + + dolt gc + + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] +} + +@test "stats: encode/decode loop is delimiter safe" { + cd repo2 - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] -#} +dolt sql <50% of rows + dolt sql -q "delete from xy where x > 600" - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] - #stop_sql_server + sleep 1 - ## make sure restarted server sees same stats - #start_sql_server - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] - #stop_sql_server -#} + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "4" ] +} -#@test "stats: deletes refresh" { - #cd repo2 +@test "stats: dolt_state_purge cli" { + cd repo2 - #dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;" + dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" - ## setting variables doesn't hang or error - #dolt sql -q "set @@persist.dolt_stats_auto_refresh_enabled = 1;" - #dolt sql -q "set @@persist.dolt_stats_auto_refresh_threshold = .5" - #dolt sql -q "set @@persist.dolt_stats_auto_refresh_interval = 1;" + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + dolt sql -q "analyze table xy" #start_sql_server #sleep 1 - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "8" ] + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - ## delete >50% of rows - #dolt sql -q "delete from xy where x > 600" + dolt sql -q "call dolt_stats_purge()" - #sleep 1 + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] +} + +@test "stats: dolt_state_purge server" { + cd repo2 - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "4" ] -#} + dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" -#@test "stats: dolt_state_purge cli" { - #cd repo2 + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" - #dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" + start_sql_server - ## setting variables doesn't hang or error - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + sleep 1 - #dolt sql -q "analyze table xy" - ##start_sql_server + dolt sql -q "analyze table xy" - ##sleep 1 + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + dolt sql -q "call dolt_stats_purge()" - #dolt sql -q "call dolt_stats_purge()" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] + + dolt sql -q "analyze table xy" + + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "0" ] -#} + stop_sql_server +} -#@test "stats: dolt_state_purge server" { - #cd repo2 +@test "stats: dolt_state_prune cli" { + cd repo2 - #dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" + dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" - ## setting variables doesn't hang or error - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + dolt sql -q "analyze table xy" #start_sql_server #sleep 1 - #dolt sql -q "analyze table xy" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + dolt sql -q "call dolt_stats_prune()" - #dolt sql -q "call dolt_stats_purge()" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] +} - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "0" ] +@test "stats: dolt_state_prune server" { + cd repo2 - #dolt sql -q "analyze table xy" + dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" - #stop_sql_server -#} + start_sql_server -#@test "stats: dolt_state_prune cli" { - #cd repo2 + sleep 1 - #dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" + dolt sql -q "analyze table xy" - ## setting variables doesn't hang or error - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #dolt sql -q "analyze table xy" - ##start_sql_server + dolt sql -q "call dolt_stats_prune()" - ##sleep 1 + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + stop_sql_server +} - #dolt sql -q "call dolt_stats_prune()" +@test "stats: add/delete table" { + cd repo1 - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] -#} + dolt sql -q "insert into ab values (0,0), (1,0), (2,0)" -#@test "stats: dolt_state_prune server" { - #cd repo2 + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;" - #dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" + start_sql_server - ## setting variables doesn't hang or error - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + sleep 1 - #start_sql_server + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #sleep 1 + # add table + dolt sql -q "create table xy (x int primary key, y int)" + # schema changes don't impact the table hash + dolt sql -q "insert into xy values (0,0)" - #dolt sql -q "analyze table xy" + sleep 1 - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "1" ] - #dolt sql -q "call dolt_stats_prune()" + dolt sql -q "truncate table xy" - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + sleep 1 - #stop_sql_server -#} + dolt sql -q "select * from xy" -#@test "stats: add/delete table" { - #cd repo1 + dolt sql -q "select * from dolt_statistics where table_name = 'xy'" - #dolt sql -q "insert into ab values (0,0), (1,0), (2,0)" + run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] - ## setting variables doesn't hang or error - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5" - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;" + dolt sql -q "drop table xy" - #start_sql_server + run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "0" ] +} - #sleep 1 +@test "stats: add/delete index" { + cd repo2 - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" - ## add table - #dolt sql -q "create table xy (x int primary key, y int)" - ## schema changes don't impact the table hash - #dolt sql -q "insert into xy values (0,0)" + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;" - #sleep 1 + start_sql_server - #run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "1" ] + sleep 1 - #dolt sql -q "truncate table xy" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] - #sleep 1 + # delete secondary + dolt sql -q "alter table xy drop index y" + # schema changes don't impact the table hash + dolt sql -q "insert into xy values (3,0)" - #dolt sql -q "select * from xy" + sleep 1 - #dolt sql -q "select * from dolt_statistics where table_name = 'xy'" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "1" ] + + dolt sql -q "alter table xy add index yx (y,x)" + # row change to impact table hash + dolt sql -q "insert into xy values (4,0)" - #run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "0" ] + sleep 1 - #dolt sql -q "drop table xy" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] +} - #run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "0" ] -#} +@test "stats: most common values" { + cd repo2 -#@test "stats: add/delete index" { - #cd repo2 + dolt sql -q "alter table xy add index y2 (y)" + dolt sql -q "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)" - #dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" + dolt sql -q "analyze table xy" - ## setting variables doesn't hang or error - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5" - #dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;" + run dolt sql -r csv -q "select mcv1, mcv2 from dolt_statistics where index_name = 'y2'" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "1,0" ] +} - #start_sql_server +@test "stats: multi db" { + cd repo1 - #sleep 1 + dolt sql -q "insert into ab values (0,0), (1,1)" - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] + cd ../repo2 - ## delete secondary - #dolt sql -q "alter table xy drop index y" - ## schema changes don't impact the table hash - #dolt sql -q "insert into xy values (3,0)" + dolt sql -q "insert into ab values (0,0), (1,1)" + dolt sql -q "insert into xy values (0,0), (1,1)" - #sleep 1 + cd .. - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "1" ] + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = 0.5" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;" - #dolt sql -q "alter table xy add index yx (y,x)" - ## row change to impact table hash - #dolt sql -q "insert into xy values (4,0)" + start_sql_server + sleep 1 - #sleep 1 + dolt sql -q "use repo1" + run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name" + [ "$status" -eq 0 ] + [ "${lines[0]}" = "database_name,table_name,index_name" ] + [ "${lines[1]}" = "repo1,ab,b" ] + [ "${lines[2]}" = "repo1,ab,primary" ] - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "2" ] -#} + run dolt sql -r csv -q "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name" + [ "$status" -eq 0 ] + [ "${lines[0]}" = "database_name,table_name,index_name" ] + [ "${lines[1]}" = "repo2,ab,b" ] + [ "${lines[2]}" = "repo2,ab,primary" ] + [ "${lines[3]}" = "repo2,xy,primary" ] + [ "${lines[4]}" = "repo2,xy,y" ] +} -#@test "stats: most common values" { - #cd repo2 +@test "stats: add/delete database" { + cd repo1 - #dolt sql -q "alter table xy add index y2 (y)" - #dolt sql -q "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)" + # setting variables doesn't hang or error + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5" + dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;" - #dolt sql -q "analyze table xy" + start_sql_server - #run dolt sql -r csv -q "select mcv1, mcv2 from dolt_statistics where index_name = 'y2'" - #[ "$status" -eq 0 ] - #[ "${lines[1]}" = "1,0" ] -#} + dolt sql -q "insert into ab values (0,0), (1,0), (2,0)" + dolt sql < data.py +import random +import os - ## drop repo2 - #dolt sql -q "drop database repo2" +rows = 2*1000*1000+1 - #sleep 1 +def main(): + f = open("data.csv","w+") + f.write("id,hostname\n") - ## we can't access repo2 stats, but still try - #run dolt sql -r csv < data.py -#import random -#import os - -#rows = 2*1000*1000+1 - -#def main(): - #f = open("data.csv","w+") - #f.write("id,hostname\n") - - #for i in range(rows): - #hostname = random.getrandbits(100) - #f.write(f"{i},{hostname}\n") - #if i % (500*1000) == 0: - #print("row :", i) - #f.flush() - - #f.close() - -#if __name__ == "__main__": - #main() -#EOF - - #mkdir repo3 - #cd repo3 - #python3 ../data.py - - #dolt init - #dolt sql -q "create table f (id int primary key, hostname int)" - #dolt table import -u --continue f data.csv - - #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;" - - #run dolt sql -r csv -q "select count(*) from dolt_statistics" - #[ "$status" -eq 0 ] - #[[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false - #[ "${lines[2]}" = "0" ] -#} + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false + [ "${lines[2]}" = "0" ] +} @test "stats: stats delete index schema change" { cd repo2