diff --git a/.gitignore b/.gitignore index 93a35270dd..dff8320b4b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ node_modules .dist /junit .eslintcache +schema_tmp.sql diff --git a/README.md b/README.md index 14b28c3eb8..83adf938e9 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,45 @@ The realm server uses the request accept header to determine the type of request | `text/html` | Card instance URL's should not include the `.json` file extension. This is considered a 404 | Used to request rendered card instance html (this serves the host application) | | `*/*` | We support node-like resolution, which means that the extension is optional | Used to request transpiled executable code modules | +### Database + +Boxel uses a Postgres database. In development, the Postgres database runs within a docker container, `boxel-pg`, that is started as part of `pnpm start:all`. You can manually start and stop the `boxel-pg` docker container using `pnpm start:pg` and `pnpm stop:pg`. The postgres database runs on port 5435 so that it doesn't conflict with a natively installed postgres that may be running on your system. + +When running tests we isolate the database between each test run by actually creating a new database for each test with a random database name (e.g. `test_db_1234567`). The test databases are dropped before the beginning of each test run. + +If you wish to drop the development database you can execute: +``` +pnpm drop-db +``` + +You can then run `pnpm migrate up` or start the realm server to create the database again. + +#### DB Migrations +When the realm server starts up it will automatically run DB migrations that live in the `packages/realm-server/migrations` folder. As part of development you may wish to run migrations manually as well as to create a new migration. + +To create a new migration, from `packages/realm-server`, execute: +``` +pnpm migrate create name-of-migration +``` +This creates a new migration file in `packages/realm-server/migrations`. You can then edit the newly created migration file with the details of your migration. We use `node-pg-migrate` to handle our migrations. You can find the API at https://salsita.github.io/node-pg-migrate. + +To run the migration, execute: +``` +pnpm migrate up +``` + +To revert the migration, execute: +``` +pnpm migrate down +``` + +Boxel also uses SQLite in order to run the DB in the browser as part of running browser tests (and eventually we may run the realm server in the browser to provide a local index). We treat the Postgres database schema as the source of truth and derive the SQLite schema from it. Therefore, once you author and apply a migration, you should generate a new schema SQL file for SQLite. To generate a new SQLite schema, from `packages/realm-server`, execute: +``` +pnpm make-schema +``` +This will create a new SQLite schema based on the current postgres DB (the schema file will be placed in the `packages/host/config/schema` directory). This schema file will share the same timestamp as the latest migration file's timestamp. If you forget to generate a new schema file, the next time you start the host app, you will receive an error that the SQLite schema is out of date. + + ### Matrix Server The boxel platform leverages a Matrix server called Synapse in order to support identity, workflow, and chat behaviors. This project uses a dockerized Matrix server. We have multiple matrix server configurations (currently one for development that uses a persistent DB, and one for testing that uses an in-memory DB). You can find and configure these matrix servers at `packages/matrix/docker/synapse/*`. diff --git a/packages/host/config/environment.js b/packages/host/config/environment.js index aa0bc5ec1f..d1471752e4 100644 --- a/packages/host/config/environment.js +++ b/packages/host/config/environment.js @@ -3,7 +3,7 @@ const fs = require('fs'); const path = require('path'); -let sqlSchema = fs.readFileSync(path.join(__dirname, 'schema.sql'), 'utf8'); +let sqlSchema = fs.readFileSync(getLatestSchemaFile(), 'utf8'); module.exports = function (environment) { const ENV = { @@ -94,3 +94,26 @@ module.exports = function (environment) { return ENV; }; + +function getLatestSchemaFile() { + const migrationsDir = path.resolve( + path.join(__dirname, '..', '..', 'realm-server', 'migrations'), + ); + let migrations = fs.readdirSync(migrationsDir); + let lastMigration = migrations + .filter((f) => f !== '.eslintrc.js') + .sort() + .pop(); + const schemaDir = path.join(__dirname, 'schema'); + let files = fs.readdirSync(schemaDir); + let latestSchemaFile = files.sort().pop(); + if ( + lastMigration.replace(/_.*/, '') !== latestSchemaFile.replace(/_.*/, '') && + ['development', 'test'].includes(process.env.EMBER_ENV) + ) { + throw new Error( + `The sqlite schema file is out of date--please regenerate the sqlite schema file`, + ); + } + return path.join(schemaDir, latestSchemaFile); +} diff --git a/packages/host/config/schema.sql b/packages/host/config/schema.sql deleted file mode 100644 index 2f0e48891b..0000000000 --- a/packages/host/config/schema.sql +++ /dev/null @@ -1,29 +0,0 @@ -CREATE TABLE IF NOT EXISTS indexed_cards ( - card_url TEXT NOT NULL, - realm_version INTEGER NOT NULL, - realm_url TEXT NOT NULL, - -- WARNING SQLite doesn't actually have a JSON type. Rather JSON just falls - -- back to TEXT which can be recognized as JSON via SQLite JSON functions as - -- part of queries - pristine_doc JSON, - search_doc JSON, - error_doc JSON, - deps JSON, - types JSON, - embedded_html TEXT, - isolated_html TEXT, - indexed_at INTEGER, - -- WARNING SQLite doesn't have a BOOLEAN type, but it does recognize TRUE and - -- FALSE. These values will be returned as 1 and 0 in SQLite result sets - is_deleted BOOLEAN, - PRIMARY KEY (card_url, realm_version) -); - -CREATE TABLE IF NOT EXISTS realm_versions ( - realm_url TEXT NOT NULL PRIMARY KEY, - current_version INTEGER NOT NULL -); - -CREATE INDEX IF NOT EXISTS realm_version_idx ON indexed_cards (realm_version); -CREATE INDEX IF NOT EXISTS realm_url_idx ON indexed_cards (realm_url); -CREATE INDEX IF NOT EXISTS current_version_idx ON realm_versions (current_version); diff --git a/packages/host/config/schema/1712771547705_schema.sql b/packages/host/config/schema/1712771547705_schema.sql new file mode 100644 index 0000000000..ec605c7fa5 --- /dev/null +++ b/packages/host/config/schema/1712771547705_schema.sql @@ -0,0 +1,24 @@ +-- This is auto-generated by packages/realm-server/scripts/convert-to-sqlite.ts +-- Please don't directly modify this file + + CREATE TABLE IF NOT EXISTS indexed_cards ( + card_url TEXT NOT NULL, + realm_version INTEGER NOT NULL, + realm_url TEXT NOT NULL, + pristine_doc JSON, + search_doc JSON, + error_doc JSON, + deps JSON, + types JSON, + embedded_html TEXT, + isolated_html TEXT, + indexed_at INTEGER, + is_deleted BOOLEAN, + PRIMARY KEY ( card_url, realm_version ) +); + + CREATE TABLE IF NOT EXISTS realm_versions ( + realm_url TEXT NOT NULL, + current_version INTEGER NOT NULL, + PRIMARY KEY ( realm_url ) +); \ No newline at end of file diff --git a/packages/realm-server/migrations/1712771547705_initial.js b/packages/realm-server/migrations/1712771547705_initial.js index 62f1e0742f..92e04b9024 100644 --- a/packages/realm-server/migrations/1712771547705_initial.js +++ b/packages/realm-server/migrations/1712771547705_initial.js @@ -1,7 +1,7 @@ exports.up = (pgm) => { pgm.createTable('indexed_cards', { card_url: { type: 'varchar', notNull: true }, - realm_version: { type: 'varchar', notNull: true }, + realm_version: { type: 'integer', notNull: true }, realm_url: { type: 'varchar', notNull: true }, pristine_doc: 'jsonb', search_doc: 'jsonb', diff --git a/packages/realm-server/package.json b/packages/realm-server/package.json index 4e92c50b68..9c10714cce 100644 --- a/packages/realm-server/package.json +++ b/packages/realm-server/package.json @@ -56,6 +56,7 @@ "qs": "^6.10.5", "qunit": "^2.20.0", "sane": "^5.0.1", + "sql-parser-cst": "^0.28.0", "start-server-and-test": "^1.14.0", "supertest": "^6.2.4", "testem": "^3.10.1", @@ -95,7 +96,9 @@ "lint:js": "eslint . --cache", "lint:js:fix": "eslint . --fix", "lint:glint": "glint", - "migrate": "node-pg-migrate" + "migrate": "PGDATABASE=boxel ./scripts/ensure-db-exists.sh && PGPORT=5435 PGDATABASE=boxel PGUSER=postgres node-pg-migrate", + "make-schema": "./scripts/schema-dump.sh", + "drop-db": "docker exec boxel-pg dropdb -U postgres -w boxel" }, "volta": { "extends": "../../package.json" diff --git a/packages/realm-server/scripts/convert-to-sqlite.ts b/packages/realm-server/scripts/convert-to-sqlite.ts new file mode 100755 index 0000000000..725d6bcd19 --- /dev/null +++ b/packages/realm-server/scripts/convert-to-sqlite.ts @@ -0,0 +1,205 @@ +/* eslint-env node */ +import { readFileSync, readdirSync, writeFileSync } from 'fs-extra'; +import { resolve, join } from 'path'; +import { + parse, + type CreateTableStmt, + type AlterTableStmt, + type Program, +} from 'sql-parser-cst'; + +// Currently this script only cares about CREATE TABLE statements and ALTER +// TABLE statements that add primary key constraints. All the other schema aspects of the +// pg_dump are generally beyond the capability of SQLite. Perhaps index creation +// can be added but it will get really tricky fast since SQLite's indices are +// much more simplistic than postgres. + +const args = process.argv; +const migrationsDir = resolve(join(__dirname, '..', 'migrations')); +const sqliteSchemaDir = resolve( + join(__dirname, '..', '..', 'host', 'config', 'schema'), +); +const INDENT = ' '; + +let pgDumpFile = args[2]; +if (!pgDumpFile) { + console.error(`please specify the path of the pg_dump file`); + process.exit(-1); +} +let pgDump = readFileSync(pgDumpFile, 'utf8'); + +let cst = parse(prepareDump(pgDump), { + dialect: 'postgresql', +}); + +let sql: string[] = [ + ` +-- This is auto-generated by packages/realm-server/scripts/convert-to-sqlite.ts +-- Please don't directly modify this file + +`, +]; +for (let statement of cst.statements) { + if (statement.type !== 'create_table_stmt') { + continue; + } + sql.push('CREATE TABLE IF NOT EXISTS'); + if ( + statement.name.type === 'member_expr' && + statement.name.property.type === 'identifier' + ) { + let tableName = statement.name.property.name; + sql.push(statement.name.property.name, '(\n'); + createColumns(cst, tableName, statement, sql); + } else { + throw new Error(`could not determine table name to be created`); + } + + sql.push('\n);\n\n'); +} + +let result = sql.join(' ').trim(); +let filename = getSchemaFilename(); +let schemaFile = join(sqliteSchemaDir, filename); +writeFileSync(schemaFile, result); +console.log(`created SQLite schema file ${schemaFile}`); + +function createColumns( + cst: Program, + tableName: string, + statement: CreateTableStmt, + sql: string[], +) { + if (!statement.columns) { + return; + } + let columns: string[] = []; + for (let [index, item] of statement.columns.expr.items.entries()) { + if (item.type !== 'column_definition') { + continue; + } + let column: string[] = []; + column.push(index === 0 ? INDENT.substring(1) : INDENT, item.name.name); + if (item.dataType?.type === 'named_data_type') { + let dataTypeName = Array.isArray(item.dataType.nameKw) + ? item.dataType.nameKw[0] + : item.dataType.nameKw; + switch (dataTypeName.name) { + case 'CHARACTER': + column.push('TEXT'); + break; + case 'JSONB': + // TODO change this to 'BLOB' after we do the sqlite BLOB storage + // support in CS-6668 for faster performance + column.push('JSON'); + break; + case 'BOOLEAN': + column.push('BOOLEAN'); + break; + case 'INTEGER': + column.push('INTEGER'); + break; + } + } + for (let constraint of item.constraints) { + switch (constraint.type) { + case 'constraint_not_null': + column.push('NOT NULL'); + break; + case 'constraint_primary_key': + column.push('PRIMARY KEY'); + break; + default: + throw new Error( + `Don't know how to serialize constraint ${constraint.type} for column '${item.name.name}'`, + ); + } + } + + columns.push(column.join(' ')); + } + let pkConstraint = makePrimaryKeyConstraint(cst, tableName); + sql.push([...columns, ...(pkConstraint ? [pkConstraint] : [])].join(',\n')); +} + +function makePrimaryKeyConstraint( + cst: Program, + tableName: string, +): string | undefined { + let alterTableStmts = cst.statements.filter( + (s) => + s.type === 'alter_table_stmt' && + s.table.type === 'table_without_inheritance' && + s.table.table.type === 'member_expr' && + s.table.table.property.type === 'identifier' && + s.table.table.property.name === tableName, + ) as AlterTableStmt[]; + let pkConstraint: string[] = []; + for (let alterTableStmt of alterTableStmts) { + for (let item of alterTableStmt.actions.items) { + if (item.type === 'alter_action_add_constraint') { + switch (item.constraint.type) { + case 'constraint_primary_key': { + if (pkConstraint.length > 0) { + throw new Error( + `encountered multiple primary key constraints for table ${tableName}`, + ); + } + if (item.constraint.columns) { + let columns: string[] = []; + if (item.constraint.columns.type === 'paren_expr') { + for (let column of item.constraint.columns.expr.items) { + if ( + column.type === 'index_specification' && + column.expr.type === 'identifier' + ) { + columns.push(column.expr.name); + } + } + } else { + throw new Error( + `Don't know how to serialize constraint ${item.constraint.type} for table '${tableName}'`, + ); + } + if (columns.length > 0) { + pkConstraint.push( + INDENT, + 'PRIMARY KEY (', + columns.join(', '), + ')', + ); + } + } + break; + } + default: + throw new Error( + `Don't know how to serialize constraint ${item.constraint.type} for table '${tableName}'`, + ); + } + } + } + } + if (pkConstraint.length === 0) { + return undefined; + } + return pkConstraint.join(' '); +} + +// This strips out all the things that our SQL AST chokes on (it's still in an +// experimental phase for postgresql) +function prepareDump(sql: string): string { + let result = sql + .replace(/\s*SET\s[^;].*;/gm, '') + .replace(/\s*CREATE\sTYPE\s[^;]*;/gm, ''); + return result; +} + +function getSchemaFilename(): string { + let files = readdirSync(migrationsDir); + let lastFile = files + .filter((f) => f !== '.eslintrc.js') + .sort() + .pop()!; + return `${lastFile.replace(/_.*/, '')}_schema.sql`; +} diff --git a/packages/realm-server/scripts/ensure-db-exists.sh b/packages/realm-server/scripts/ensure-db-exists.sh new file mode 100755 index 0000000000..2e63a829cb --- /dev/null +++ b/packages/realm-server/scripts/ensure-db-exists.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +if docker exec boxel-pg psql -U postgres -w -lqt | cut -d \| -f 1 | grep -qw "$PGDATABASE"; then + echo "Database $PGDATABASE exists" +else + docker exec boxel-pg psql -U postgres -w -c "CREATE DATABASE $PGDATABASE" + echo "created database $PGDATABASE" +fi diff --git a/packages/realm-server/scripts/schema-dump.sh b/packages/realm-server/scripts/schema-dump.sh new file mode 100755 index 0000000000..183c11a729 --- /dev/null +++ b/packages/realm-server/scripts/schema-dump.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +tmpFile='./schema_tmp.sql' + +docker exec boxel-pg pg_dump \ + -U postgres -w --schema-only \ + --exclude-table-and-children=job_statuses \ + --exclude-table-and-children=pgmigrations \ + --exclude-table-and-children=jobs \ + --no-tablespaces \ + --no-table-access-method \ + --no-owner \ + --no-acl \ + boxel >$tmpFile + +ts-node --transpileOnly ./scripts/convert-to-sqlite.ts $tmpFile +rm $tmpFile diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5dc66d3b62..409ad88776 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1594,6 +1594,9 @@ importers: sane: specifier: ^5.0.1 version: 5.0.1 + sql-parser-cst: + specifier: ^0.28.0 + version: 0.28.0 start-server-and-test: specifier: ^1.14.0 version: 1.14.0 @@ -20969,6 +20972,10 @@ packages: /sprintf-js@1.1.3: resolution: {integrity: sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==} + /sql-parser-cst@0.28.0: + resolution: {integrity: sha512-LNxy0X6C04iXRx83DXbK6PVwIvj1deEVN9wZAf139KeD32/oEQfFBL2RmXyElrgqCO99hQLJKbbQL40j4yx/uw==} + dev: true + /sri-toolbox@0.2.0: resolution: {integrity: sha512-DQIMWCAr/M7phwo+d3bEfXwSBEwuaJL+SJx9cuqt1Ty7K96ZFoHpYnSbhrQZEr0+0/GtmpKECP8X/R4RyeTAfw==} engines: {node: '>= 0.10.4'}