-
Notifications
You must be signed in to change notification settings - Fork 363
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: store database code as code [DET-9180] #9302
Changes from 7 commits
b703f90
2eb61d0
95b8a18
28572d2
b4bda45
53646f9
1848947
7764c28
b075098
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,18 @@ | ||
package db | ||
|
||
import ( | ||
"crypto/sha256" | ||
"encoding/hex" | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
"regexp" | ||
"strconv" | ||
|
||
"github.com/go-pg/migrations/v8" | ||
"github.com/go-pg/pg/v10" | ||
"github.com/jackc/pgconn" | ||
"github.com/jmoiron/sqlx" | ||
"github.com/pkg/errors" | ||
log "github.com/sirupsen/logrus" | ||
) | ||
|
@@ -111,8 +116,117 @@ func ensureMigrationUpgrade(tx *pg.Tx) error { | |
return nil | ||
} | ||
|
||
func (db *PgDB) readDBCodeAndCheckIfDifferent(dbCodeDir string) (map[string]string, bool, error) { | ||
files, err := os.ReadDir(dbCodeDir) | ||
if err != nil { | ||
return nil, false, fmt.Errorf("reading '%s' directory for database views: %w", dbCodeDir, err) | ||
} | ||
|
||
allCode := "" | ||
fileNamesToSQL := make(map[string]string) | ||
for _, f := range files { | ||
if filepath.Ext(f.Name()) != ".sql" { | ||
continue | ||
} | ||
|
||
filePath := filepath.Join(dbCodeDir, f.Name()) | ||
b, err := os.ReadFile(filePath) //nolint: gosec // We trust dbCodeDir. | ||
if err != nil { | ||
return nil, false, fmt.Errorf("reading view definition file '%s': %w", filePath, err) | ||
} | ||
fileNamesToSQL[f.Name()] = string(b) | ||
allCode += string(b) | ||
} | ||
|
||
// I didn't want to get into deciding when to apply database or code or not but integration | ||
// tests make it really hard to not do this. | ||
hash := sha256.Sum256([]byte(allCode)) | ||
ourHash := hex.EncodeToString(hash[:]) | ||
|
||
// Check if the views_and_triggers_hash table exists. If it doesn't return that we need to create db code. | ||
var tableExists bool | ||
if err = db.sql.QueryRow( | ||
"SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'views_and_triggers_hash')"). | ||
Scan(&tableExists); err != nil { | ||
return nil, false, fmt.Errorf("checking views_and_triggers_hash exists: %w", err) | ||
} | ||
if !tableExists { | ||
return fileNamesToSQL, true, nil | ||
} | ||
|
||
// Check if our hashes match. If they do we can just return we don't need to do anything. | ||
var databaseHash string | ||
if err := db.sql.QueryRow("SELECT hash FROM views_and_triggers_hash").Scan(&databaseHash); err != nil { | ||
return nil, false, fmt.Errorf("getting hash from views_and_triggers_hash: %w", err) | ||
} | ||
if databaseHash == ourHash { | ||
return fileNamesToSQL, false, nil | ||
} | ||
|
||
// Update our hash and return we need to create views and triggers. | ||
if _, err := db.sql.Exec("UPDATE views_and_triggers_hash SET hash = $1", ourHash); err != nil { | ||
return nil, false, fmt.Errorf("updating our database hash: %w", err) | ||
} | ||
return fileNamesToSQL, false, nil | ||
} | ||
|
||
func (db *PgDB) addDBCode(fileNamesToSQL map[string]string) error { | ||
if err := db.withTransaction("determined database views", func(tx *sqlx.Tx) error { | ||
for filePath, sql := range fileNamesToSQL { | ||
if _, err := tx.Exec(sql); err != nil { | ||
return fmt.Errorf("running database view file '%s': %w", filePath, err) | ||
} | ||
} | ||
|
||
return nil | ||
}); err != nil { | ||
return fmt.Errorf("adding determined database views: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (db *PgDB) dropDBCode() error { | ||
// SET search_path since the ALTER DATABASE ... SET SEARCH_PATH won't apply to this connection | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im confused by this comment - can you elaborate? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
// since it was created after the migration ran. | ||
if _, err := db.sql.Exec(` | ||
DROP SCHEMA IF EXISTS determined_code CASCADE; | ||
CREATE SCHEMA determined_code;`); err != nil { | ||
return fmt.Errorf("removing determined database views so they can be created later: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// This is set in an init in postgres_test_utils.go behind the intg feature flag. | ||
// For normal usages this won't build. For tests we need to serialize access to | ||
// run migrations. | ||
var testOnlyDBLock func(sql *sqlx.DB) (unlock func()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This took me a minute to figure out - a comment indicating it gets initialized and acquired in init() in a test module would be a clarity improvement IMO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the way it was factored out, though - much cleaner There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added comment |
||
|
||
// Migrate runs the migrations from the specified directory URL. | ||
func (db *PgDB) Migrate(migrationURL string, actions []string) (isNew bool, err error) { | ||
func (db *PgDB) Migrate( | ||
migrationURL string, dbCodeDir string, actions []string, | ||
) (isNew bool, err error) { | ||
if testOnlyDBLock != nil { | ||
// In integration tests, multiple processes can be running this code at once, which can lead to | ||
// errors because PostgreSQL's CREATE TABLE IF NOT EXISTS is not great with concurrency. | ||
cleanup := testOnlyDBLock(db.sql) | ||
defer cleanup() | ||
} | ||
|
||
dbCodeFiles, needToUpdateDBCode, err := db.readDBCodeAndCheckIfDifferent(dbCodeDir) | ||
if err != nil { | ||
return false, err | ||
} | ||
if needToUpdateDBCode { | ||
if err := db.dropDBCode(); err != nil { | ||
return false, err | ||
} | ||
log.Info("database views changed") | ||
} else { | ||
log.Info("database views unchanged, will not updated") | ||
} | ||
|
||
// go-pg/migrations uses go-pg/pg connection API, which is not compatible | ||
// with pgx, so we use a one-off go-pg/pg connection. | ||
pgOpts, err := makeGoPgOpts(db.URL) | ||
|
@@ -139,17 +253,6 @@ func (db *PgDB) Migrate(migrationURL string, actions []string) (isNew bool, err | |
} | ||
}() | ||
|
||
// In integration tests, multiple processes can be running this code at once, which can lead to | ||
// errors because PostgreSQL's CREATE TABLE IF NOT EXISTS is not great with concurrency. | ||
|
||
// Arbitrarily chosen unique consistent ID for the lock. | ||
const MigrationLockID = 0x33ad0708c9bed25b | ||
|
||
_, err = tx.Exec("SELECT pg_advisory_xact_lock(?)", MigrationLockID) | ||
if err != nil { | ||
return false, err | ||
} | ||
|
||
if err = ensureMigrationUpgrade(tx); err != nil { | ||
return false, errors.Wrap(err, "error upgrading migration metadata") | ||
} | ||
|
@@ -186,6 +289,14 @@ func (db *PgDB) Migrate(migrationURL string, actions []string) (isNew bool, err | |
log.Infof("migrated from %d to %d", oldVersion, newVersion) | ||
} | ||
|
||
if newVersion >= 20240502203516 { // Only comes up in testing old data. | ||
if needToUpdateDBCode { | ||
if err := db.addDBCode(dbCodeFiles); err != nil { | ||
return false, err | ||
} | ||
} | ||
} | ||
|
||
log.Info("DB migrations completed") | ||
return oldVersion == 0, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't we be returning
true
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no just returning the zero value.
If an error is not nil the other return values should not be looked at. it is really hard to work with code that returns non nil errors and 'meaningful' values.