From 100bceda45632e0a64e965b5f52ce66166f5ade7 Mon Sep 17 00:00:00 2001 From: Evan Hahn Date: Thu, 23 May 2024 22:00:19 +0000 Subject: [PATCH] feat: rebuild SQLite when migrations occur See [#436]. [#436]: https://github.com/digidem/comapeo-core/issues/436 --- package-lock.json | 8 +-- package.json | 2 +- src/constants.js | 3 ++ src/datastore/README.md | 1 + src/datastore/index.js | 4 +- src/lib/drizzle-helpers.js | 48 +++++++++++++++++ src/mapeo-project.js | 57 ++++++++++++++------ test-e2e/migration.js | 101 ++++++++++++++++++++++++++++++++++-- test/data-type.js | 4 ++ test/datastore.js | 55 +++++++++++++++++++- test/icon-api.js | 1 + test/lib/drizzle-helpers.js | 30 +++++++++++ test/translation-api.js | 1 + 13 files changed, 287 insertions(+), 28 deletions(-) create mode 100644 src/lib/drizzle-helpers.js create mode 100644 test/lib/drizzle-helpers.js diff --git a/package-lock.json b/package-lock.json index 79587be4a..96dea9fc5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -38,7 +38,7 @@ "magic-bytes.js": "^1.10.0", "map-obj": "^5.0.2", "mime": "^4.0.3", - "multi-core-indexer": "^1.0.0-alpha.10", + "multi-core-indexer": "^1.0.0", "p-defer": "^4.0.0", "p-event": "^6.0.1", "p-timeout": "^6.1.2", @@ -6372,9 +6372,9 @@ "license": "MIT" }, "node_modules/multi-core-indexer": { - "version": "1.0.0-alpha.10", - "resolved": "https://registry.npmjs.org/multi-core-indexer/-/multi-core-indexer-1.0.0-alpha.10.tgz", - "integrity": "sha512-H9QdpJ/MaelrBZw6jCcsrInE+hwUQmfz/2swtIdQNNh1IHUDGEdPkakjcZAyahpM5iIVz7EqyWO74aC03A3qSA==", + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/multi-core-indexer/-/multi-core-indexer-1.0.0.tgz", + "integrity": "sha512-7EEXJuBS+uhpDnNlfEn/PbT09pdCWulI8NHYS9v+t0sEksyD+X5HNsdJKDsIDtnBlYdMzxJRnZwMDVPfp/aPYw==", "dependencies": { "@types/node": "^18.16.19", "@types/streamx": "^2.9.1", diff --git a/package.json b/package.json index d254d6688..af6a5b827 100644 --- a/package.json +++ b/package.json @@ -182,7 +182,7 @@ "magic-bytes.js": "^1.10.0", "map-obj": "^5.0.2", "mime": "^4.0.3", - "multi-core-indexer": "^1.0.0-alpha.10", + "multi-core-indexer": "^1.0.0", "p-defer": "^4.0.0", "p-event": "^6.0.1", "p-timeout": "^6.1.2", diff --git a/src/constants.js b/src/constants.js index a95af9df3..d93929c79 100644 --- a/src/constants.js +++ b/src/constants.js @@ -32,3 +32,6 @@ export const NAMESPACE_SCHEMAS = /** @type {const} */ ({ }) export const SUPPORTED_CONFIG_VERSION = 1 + +// WARNING: This value is persisted. Be careful when changing it. +export const DRIZZLE_MIGRATIONS_TABLE = '__drizzle_migrations' diff --git a/src/datastore/README.md b/src/datastore/README.md index cc1f2a841..e685c7fbb 100644 --- a/src/datastore/README.md +++ b/src/datastore/README.md @@ -19,6 +19,7 @@ const datastore = new DataStore({ // Process entries here using an indexer... }, namespace: 'data', + reindex: false, }) /** @type {MapeoDoc} */ diff --git a/src/datastore/index.js b/src/datastore/index.js index 990146122..6a29e78a6 100644 --- a/src/datastore/index.js +++ b/src/datastore/index.js @@ -51,8 +51,9 @@ export class DataStore extends TypedEmitter { * @param {TNamespace} opts.namespace * @param {(entries: MultiCoreIndexer.Entry<'binary'>[]) => Promise} opts.batch * @param {MultiCoreIndexer.StorageParam} opts.storage + * @param {boolean} opts.reindex */ - constructor({ coreManager, namespace, batch, storage }) { + constructor({ coreManager, namespace, batch, storage, reindex }) { super() this.#coreManager = coreManager this.#namespace = namespace @@ -66,6 +67,7 @@ export class DataStore extends TypedEmitter { this.#coreIndexer = new MultiCoreIndexer(cores, { storage, batch: (entries) => this.#handleEntries(entries), + reindex, }) coreManager.on('add-core', (coreRecord) => { if (coreRecord.namespace !== namespace) return diff --git a/src/lib/drizzle-helpers.js b/src/lib/drizzle-helpers.js new file mode 100644 index 000000000..781bec238 --- /dev/null +++ b/src/lib/drizzle-helpers.js @@ -0,0 +1,48 @@ +import { sql } from 'drizzle-orm' +import { assert } from '../utils.js' +/** @import { BetterSQLite3Database } from 'drizzle-orm/better-sqlite3' */ + +/** + * @param {unknown} queryResult + * @returns {number} + */ +const getNumberResult = (queryResult) => { + assert( + queryResult && + typeof queryResult === 'object' && + 'result' in queryResult && + typeof queryResult.result === 'number', + 'expected query to return proper result' + ) + return queryResult.result +} + +/** + * Get the number of rows in a table using `SELECT COUNT(*)`. + * Returns 0 if the table doesn't exist. + * + * @param {BetterSQLite3Database} db + * @param {string} tableName + * @returns {number} + */ +export const tableCountIfExists = (db, tableName) => + db.transaction((tx) => { + const existsQuery = sql` + SELECT EXISTS ( + SELECT 1 + FROM sqlite_master + WHERE type IS 'table' + AND name IS ${tableName} + ) AS result + ` + const existsResult = tx.get(existsQuery) + const exists = getNumberResult(existsResult) + if (!exists) return 0 + + const countQuery = sql` + SELECT COUNT(*) AS result + FROM ${sql.identifier(tableName)} + ` + const countResult = tx.get(countQuery) + return getNumberResult(countResult) + }) diff --git a/src/mapeo-project.js b/src/mapeo-project.js index 404586d71..13ac5d3fd 100644 --- a/src/mapeo-project.js +++ b/src/mapeo-project.js @@ -6,7 +6,11 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator' import { discoveryKey } from 'hypercore-crypto' import { TypedEmitter } from 'tiny-typed-emitter' -import { NAMESPACES, NAMESPACE_SCHEMAS } from './constants.js' +import { + NAMESPACES, + NAMESPACE_SCHEMAS, + DRIZZLE_MIGRATIONS_TABLE, +} from './constants.js' import { CoreManager } from './core-manager/index.js' import { DataStore } from './datastore/index.js' import { DataType, kCreateWithDocId } from './datatype/index.js' @@ -44,6 +48,7 @@ import { projectKeyToPublicId, valueOf, } from './utils.js' +import { tableCountIfExists } from './lib/drizzle-helpers.js' import { omit } from './lib/omit.js' import { MemberApi } from './member-api.js' import { SyncApi, kHandleDiscoveryKey } from './sync/sync-api.js' @@ -139,11 +144,37 @@ export class MapeoProject extends TypedEmitter { this.#isArchiveDevice = isArchiveDevice ///////// 1. Setup database + this.#sqlite = new Database(dbPath) const db = drizzle(this.#sqlite) - migrate(db, { migrationsFolder: projectMigrationsFolder }) + const migrationsBefore = tableCountIfExists(db, DRIZZLE_MIGRATIONS_TABLE) + migrate(db, { + migrationsFolder: projectMigrationsFolder, + migrationsTable: DRIZZLE_MIGRATIONS_TABLE, + }) + const migrationsAfter = tableCountIfExists(db, DRIZZLE_MIGRATIONS_TABLE) + const reindex = migrationsBefore > 0 && migrationsAfter !== migrationsBefore + + const indexedTables = [ + observationTable, + trackTable, + presetTable, + fieldTable, + coreOwnershipTable, + roleTable, + deviceInfoTable, + iconTable, + translationTable, + remoteDetectionAlertTable, + ] + + ///////// 2. Wipe data if we need to re-index + + if (reindex) { + for (const table of indexedTables) db.delete(table).run() + } - ///////// 2. Setup random-access-storage functions + ///////// 3. Setup random-access-storage functions /** @type {ConstructorParameters[0]['storage']} */ const coreManagerStorage = (name) => @@ -153,7 +184,7 @@ export class MapeoProject extends TypedEmitter { const indexerStorage = (name) => coreStorage(path.join(INDEXER_STORAGE_FOLDER_NAME, name)) - ///////// 3. Create instances + ///////// 4. Create instances this.#coreManager = new CoreManager({ projectSecretKey, @@ -166,18 +197,7 @@ export class MapeoProject extends TypedEmitter { }) this.#indexWriter = new IndexWriter({ - tables: [ - observationTable, - trackTable, - presetTable, - fieldTable, - coreOwnershipTable, - roleTable, - deviceInfoTable, - iconTable, - translationTable, - remoteDetectionAlertTable, - ], + tables: indexedTables, sqlite: this.#sqlite, getWinner, mapDoc: (doc, version) => { @@ -199,6 +219,7 @@ export class MapeoProject extends TypedEmitter { namespace: 'auth', batch: (entries) => this.#indexWriter.batch(entries), storage: indexerStorage, + reindex, }), config: new DataStore({ coreManager: this.#coreManager, @@ -209,12 +230,14 @@ export class MapeoProject extends TypedEmitter { sharedIndexWriter, }), storage: indexerStorage, + reindex, }), data: new DataStore({ coreManager: this.#coreManager, namespace: 'data', batch: (entries) => this.#indexWriter.batch(entries), storage: indexerStorage, + reindex, }), } @@ -363,7 +386,7 @@ export class MapeoProject extends TypedEmitter { dataType: this.#dataTypes.translation, }) - ///////// 4. Replicate local peers automatically + ///////// 5. Replicate local peers automatically // Replicate already connected local peers for (const peer of localPeers.peers) { diff --git a/test-e2e/migration.js b/test-e2e/migration.js index 319e9baa5..aa16b9468 100644 --- a/test-e2e/migration.js +++ b/test-e2e/migration.js @@ -1,18 +1,111 @@ -import test from 'node:test' import { KeyManager } from '@mapeo/crypto' -import RAM from 'random-access-memory' -import { MapeoManager } from '../src/mapeo-manager.js' import Fastify from 'fastify' import assert from 'node:assert/strict' import fsPromises from 'node:fs/promises' +import test from 'node:test' +import RAM from 'random-access-memory' import { temporaryDirectory } from 'tempy' -import { createOldManagerOnVersion2_0_1 } from './utils.js' +import { MapeoManager } from '../src/mapeo-manager.js' +import { + connectPeers, + createManager, + createOldManagerOnVersion2_0_1, + invite, +} from './utils.js' const projectMigrationsFolder = new URL('../drizzle/project', import.meta.url) .pathname const clientMigrationsFolder = new URL('../drizzle/client', import.meta.url) .pathname +test('migrations pick up values that were not previously understood', async (t) => { + // Create Manager 1, which has new data. + + const manager1 = createManager('a', t) + await manager1.setDeviceInfo({ + name: 'a', + deviceType: 'selfHostedServer', + // Old versions shouldn't be able to recognize this. + selfHostedServerDetails: { baseUrl: 'https://comapeo-test.example/' }, + }) + + const projectId = await manager1.createProject({ name: 'test project' }) + const manager1Project = await manager1.getProject(projectId) + + { + const manager1Members = await manager1Project.$member.getMany() + assert( + manager1Members.some( + (member) => + member.selfHostedServerDetails?.baseUrl === + 'https://comapeo-test.example/' + ), + 'test setup: new manager has new data' + ) + } + + // Create Manager 2, which is not yet up to date. + + const manager2DbFolder = temporaryDirectory() + const manager2CoreStorage = temporaryDirectory() + t.after(() => fsPromises.rm(manager2DbFolder, { recursive: true })) + t.after(() => fsPromises.rm(manager2CoreStorage, { recursive: true })) + + const manager2BeforeMigration = await createOldManagerOnVersion2_0_1('b', { + dbFolder: manager2DbFolder, + coreStorage: manager2CoreStorage, + }) + await manager2BeforeMigration.setDeviceInfo({ + name: 'b', + deviceType: 'mobile', + }) + + // Connect them and ensure that Manager 2 doesn't yet know about the new data. + + const disconnect = connectPeers([manager1, manager2BeforeMigration]) + + await invite({ + projectId, + invitor: manager1, + invitees: [manager2BeforeMigration], + }) + + { + const manager2Project = await manager2BeforeMigration.getProject(projectId) + await manager2Project.$sync.waitForSync('initial') + const manager2Members = await manager2Project.$member.getMany() + assert( + !manager2Members.some((member) => 'selfHostedServerDetails' in member), + "test setup: old manager doesn't understand new data (yet)" + ) + + await manager2Project.close() + } + + await disconnect() + + // Migrate Manager 2 and see that it now knows about the data. + + const manager2AfterMigration = createManager('b', t, { + dbFolder: manager2DbFolder, + coreStorage: manager2CoreStorage, + }) + + { + const manager2Project = await manager2AfterMigration.getProject(projectId) + const manager2Members = await manager2Project.$member.getMany() + const serverMember = manager2Members.find( + (member) => member.deviceType === 'selfHostedServer' + ) + assert(serverMember, 'we still have the server member') + assert.equal( + serverMember.selfHostedServerDetails?.baseUrl, + 'https://comapeo-test.example/', + 'migrated manager has new data' + ) + } +}) + test('migration of localDeviceInfo table', async (t) => { const dbFolder = temporaryDirectory() const rootKey = KeyManager.generateRootKey() diff --git a/test/data-type.js b/test/data-type.js index fd78752b9..297e3c4c0 100644 --- a/test/data-type.js +++ b/test/data-type.js @@ -60,6 +60,7 @@ test('private createWithDocId() method', async () => { return indexWriter.batch(entries) }, storage: () => new RAM(), + reindex: false, }) const dataType = new DataType({ dataStore, @@ -95,6 +96,7 @@ test('private createWithDocId() method throws when doc exists', async () => { return indexWriter.batch(entries) }, storage: () => new RAM(), + reindex: false, }) const dataType = new DataType({ dataStore, @@ -316,6 +318,7 @@ async function testenv(opts = {}) { namespace: 'data', batch: async (entries) => indexWriter.batch(entries), storage: () => new RAM(), + reindex: false, }) const configDataStore = new DataStore({ @@ -347,6 +350,7 @@ async function testenv(opts = {}) { return indexed }, storage: () => new RAM(), + reindex: false, }) const translationDataType = new DataType({ diff --git a/test/datastore.js b/test/datastore.js index fc8299cb8..f5da93dde 100644 --- a/test/datastore.js +++ b/test/datastore.js @@ -1,4 +1,4 @@ -import test from 'node:test' +import test, { mock } from 'node:test' import assert from 'node:assert/strict' import { randomBytes } from 'node:crypto' import { DataStore } from '../src/datastore/index.js' @@ -46,6 +46,7 @@ test('read and write', async () => { return {} }, storage: () => new RAM(), + reindex: false, }) const written = await dataStore.write(obs) const coreDiscoveryKey = discoveryKey(writerCore.key) @@ -80,6 +81,7 @@ test('writeRaw and read', async () => { return {} }, storage: () => new RAM(), + reindex: false, }) const buf = Buffer.from('myblob') const versionId = await dataStore.writeRaw(buf) @@ -101,6 +103,7 @@ test('index events', async () => { return {} }, storage: () => new RAM(), + reindex: false, }) dataStore.indexer.on('index-state', (state) => { indexStates.push(omit(state, ['entriesPerSecond'])) @@ -120,3 +123,53 @@ test('index events', async () => { ] assert.deepEqual(indexStates, expectedStates, 'expected index states emitted') }) + +test('re-indexing', async (t) => { + const cm = createCoreManager() + const writerCore = cm.getWriterCore('data').core + await writerCore.ready() + + /** @satisfies {ConstructorParameters[0]} */ + const commonOptions = { + coreManager: cm, + namespace: 'data', + batch: async () => ({}), + storage: RAM.reusable(), + reindex: false, + } + + const dataStore1 = new DataStore({ ...commonOptions }) + const written = await dataStore1.write(obs) + await dataStore1.close() + + const shouldNotBeCalled = mock.fn(() => Promise.resolve({})) + const dataStore2 = new DataStore({ + ...commonOptions, + batch: shouldNotBeCalled, + }) + await once(dataStore2.indexer, 'idle') + assert.equal( + shouldNotBeCalled.mock.callCount(), + 0, + 'test setup: this data store should not re-index' + ) + await dataStore2.close() + + /** @type {string[]} */ + const indexedVersionIds = [] + const dataStore3 = new DataStore({ + ...commonOptions, + batch: async (entries) => { + for (const { index, key } of entries) { + const coreDiscoveryKey = discoveryKey(key) + const versionId = getVersionId({ coreDiscoveryKey, index }) + indexedVersionIds.push(versionId) + } + return {} + }, + reindex: true, + }) + t.after(() => dataStore3.close()) + await once(dataStore3.indexer, 'idle') + assert.deepEqual(indexedVersionIds, [written.versionId], 're-indexing occurs') +}) diff --git a/test/icon-api.js b/test/icon-api.js index 6f8402f28..cbce80d2f 100644 --- a/test/icon-api.js +++ b/test/icon-api.js @@ -684,6 +684,7 @@ function setup({ coreManager: cm, storage: () => new RAM(), batch: async (entries) => indexWriter.batch(entries), + reindex: false, }) const iconDataType = new DataType({ diff --git a/test/lib/drizzle-helpers.js b/test/lib/drizzle-helpers.js new file mode 100644 index 000000000..1244292b8 --- /dev/null +++ b/test/lib/drizzle-helpers.js @@ -0,0 +1,30 @@ +import Database from 'better-sqlite3' +import { drizzle } from 'drizzle-orm/better-sqlite3' +import assert from 'node:assert/strict' +import test, { describe } from 'node:test' +import { tableCountIfExists } from '../../src/lib/drizzle-helpers.js' + +describe('table count if exists', () => { + const db = new Database(':memory:') + + db.exec('CREATE TABLE empty (ignored)') + + db.exec('CREATE TABLE filled (n INT)') + db.exec('INSERT INTO filled (n) VALUES (9)') + db.exec('INSERT INTO filled (n) VALUES (8)') + db.exec('INSERT INTO filled (n) VALUES (7)') + + const driz = drizzle(db) + + test("when table doesn't exist", () => { + assert.equal(tableCountIfExists(driz, 'doesnt_exist'), 0) + }) + + test('when table is empty', () => { + assert.equal(tableCountIfExists(driz, 'empty'), 0) + }) + + test('when table has rows', () => { + assert.equal(tableCountIfExists(driz, 'filled'), 3) + }) +}) diff --git a/test/translation-api.js b/test/translation-api.js index 26c54fa2b..a9b2c4950 100644 --- a/test/translation-api.js +++ b/test/translation-api.js @@ -137,6 +137,7 @@ function setup() { coreManager: cm, storage: () => new RAM(), batch: async (entries) => indexWriter.batch(entries), + reindex: false, }) const dataType = new DataType({