From e425a1fb9c13d5f8e2c9e6d18af2419ef1f7bd17 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 5 May 2021 21:06:51 +0100 Subject: [PATCH 1/7] Introduce an `attrs` to contain db schema state --- synapse/storage/prepare_database.py | 92 +++++++++++++++++------------ 1 file changed, 53 insertions(+), 39 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 3799d46734ae..61a936e7be72 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -1,5 +1,4 @@ -# Copyright 2014 - 2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2014 - 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -59,6 +58,21 @@ class UpgradeDatabaseException(PrepareDatabaseException): ) +@attr.s +class _SchemaState: + current_version: int = attr.ib() + """The current schema version of the database""" + + applied_deltas: Collection[str] = attr.ib(factory=tuple) + """Any delta files for `current_version` which have already been applied""" + + upgraded: bool = attr.ib(default=False) + """Whether the current state was reached by applying deltas. + + If False, we have run the full schema for `current_version`, and have applied no + deltas since. If True, we have run some deltas since the original creation.""" + + def prepare_database( db_conn: LoggingDatabaseConnection, database_engine: BaseDatabaseEngine, @@ -96,12 +110,11 @@ def prepare_database( version_info = _get_or_create_schema_state(cur, database_engine) if version_info: - user_version, delta_files, upgraded = version_info logger.info( "%r: Existing schema is %i (+%i deltas)", databases, - user_version, - len(delta_files), + version_info.current_version, + len(version_info.applied_deltas), ) # config should only be None when we are preparing an in-memory SQLite db, @@ -113,16 +126,18 @@ def prepare_database( # if it's a worker app, refuse to upgrade the database, to avoid multiple # workers doing it at once. - if config.worker_app is not None and user_version != SCHEMA_VERSION: + if ( + config.worker_app is not None + and version_info.current_version != SCHEMA_VERSION + ): raise UpgradeDatabaseException( - OUTDATED_SCHEMA_ON_WORKER_ERROR % (SCHEMA_VERSION, user_version) + OUTDATED_SCHEMA_ON_WORKER_ERROR + % (SCHEMA_VERSION, version_info.current_version) ) _upgrade_existing_database( cur, - user_version, - delta_files, - upgraded, + version_info, database_engine, config, databases=databases, @@ -261,9 +276,7 @@ def _setup_new_database( _upgrade_existing_database( cur, - current_version=max_current_ver, - applied_delta_files=[], - upgraded=False, + _SchemaState(current_version=max_current_ver), database_engine=database_engine, config=None, databases=databases, @@ -273,9 +286,7 @@ def _setup_new_database( def _upgrade_existing_database( cur: Cursor, - current_version: int, - applied_delta_files: List[str], - upgraded: bool, + current_schema_state: _SchemaState, database_engine: BaseDatabaseEngine, config: Optional[HomeServerConfig], databases: Collection[str], @@ -321,12 +332,8 @@ def _upgrade_existing_database( Args: cur - current_version: The current version of the schema. - applied_delta_files: A list of deltas that have already been applied. - upgraded: Whether the current version was generated by having - applied deltas or from full schema file. If `True` the function - will never apply delta files for the given `current_version`, since - the current_version wasn't generated by applying those delta files. + current_schema_state: The current version of the schema, as + returned by _get_or_create_schema_state database_engine config: None if we are initialising a blank database, otherwise the application @@ -337,13 +344,13 @@ def _upgrade_existing_database( upgrade portions of the delta scripts. """ if is_empty: - assert not applied_delta_files + assert not current_schema_state.applied_deltas else: assert config is_worker = config and config.worker_app is not None - if current_version > SCHEMA_VERSION: + if current_schema_state.current_version > SCHEMA_VERSION: raise ValueError( "Cannot use this database as it is too " + "new for the server to understand" @@ -357,14 +364,14 @@ def _upgrade_existing_database( assert config is not None check_database_before_upgrade(cur, database_engine, config) - start_ver = current_version + start_ver = current_schema_state.current_version # if we got to this schema version by running a full_schema rather than a series # of deltas, we should not run the deltas for this version. - if not upgraded: + if not current_schema_state.upgraded: start_ver += 1 - logger.debug("applied_delta_files: %s", applied_delta_files) + logger.debug("applied_delta_files: %s", current_schema_state.applied_deltas) if isinstance(database_engine, PostgresEngine): specific_engine_extension = ".postgres" @@ -440,7 +447,7 @@ def _upgrade_existing_database( absolute_path = entry.absolute_path logger.debug("Found file: %s (%s)", relative_path, absolute_path) - if relative_path in applied_delta_files: + if relative_path in current_schema_state.applied_deltas: continue root_name, ext = os.path.splitext(file_name) @@ -621,7 +628,7 @@ def execute_statements_from_stream(cur: Cursor, f: TextIO) -> None: def _get_or_create_schema_state( txn: Cursor, database_engine: BaseDatabaseEngine -) -> Optional[Tuple[int, List[str], bool]]: +) -> Optional[_SchemaState]: # Bluntly try creating the schema_version tables. sql_path = os.path.join(schema_path, "common", "schema_version.sql") executescript(txn, sql_path) @@ -629,17 +636,24 @@ def _get_or_create_schema_state( txn.execute("SELECT version, upgraded FROM schema_version") row = txn.fetchone() - if row is not None: - current_version = int(row[0]) - txn.execute( - "SELECT file FROM applied_schema_deltas WHERE version >= ?", - (current_version,), - ) - applied_deltas = [d for d, in txn] - upgraded = bool(row[1]) - return current_version, applied_deltas, upgraded + if row is None: + # new database + return None + + current_version = int(row[0]) + upgraded = bool(row[1]) - return None + txn.execute( + "SELECT file FROM applied_schema_deltas WHERE version >= ?", + (current_version,), + ) + applied_deltas = tuple(d for d, in txn) + + return _SchemaState( + current_version=current_version, + applied_deltas=applied_deltas, + upgraded=upgraded, + ) @attr.s(slots=True) From a7da9df9a5589a0bef9bc3cac5f9bab10ffdff0b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 7 May 2021 13:57:21 +0100 Subject: [PATCH 2/7] Support for ranges of database schemas --- synapse/storage/prepare_database.py | 21 +++++++- synapse/storage/schema/README.md | 53 +++++++++++++++++++ synapse/storage/schema/__init__.py | 11 +++- .../delta/59/13schema_compat_version.sql | 17 ++++++ .../storage/schema/common/schema_version.sql | 7 +++ 5 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/common/delta/59/13schema_compat_version.sql diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 61a936e7be72..fd4558a2b02e 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -63,6 +63,13 @@ class _SchemaState: current_version: int = attr.ib() """The current schema version of the database""" + compat_version: Optional[int] = attr.ib() + """The SCHEMA_VERSION of the oldest version of Synapse for this database + + If this is None, we have an old version of the database without the necessary + table. + """ + applied_deltas: Collection[str] = attr.ib(factory=tuple) """Any delta files for `current_version` which have already been applied""" @@ -276,7 +283,7 @@ def _setup_new_database( _upgrade_existing_database( cur, - _SchemaState(current_version=max_current_ver), + _SchemaState(current_version=max_current_ver, compat_version=None), database_engine=database_engine, config=None, databases=databases, @@ -350,7 +357,10 @@ def _upgrade_existing_database( is_worker = config and config.worker_app is not None - if current_schema_state.current_version > SCHEMA_VERSION: + if ( + current_schema_state.compat_version is not None + and current_schema_state.compat_version > SCHEMA_VERSION + ): raise ValueError( "Cannot use this database as it is too " + "new for the server to understand" @@ -643,6 +653,12 @@ def _get_or_create_schema_state( current_version = int(row[0]) upgraded = bool(row[1]) + compat_version: Optional[int] = None + txn.execute("SELECT compat_version FROM schema_compat_version") + row = txn.fetchone() + if row is not None: + compat_version = int(row[0]) + txn.execute( "SELECT file FROM applied_schema_deltas WHERE version >= ?", (current_version,), @@ -651,6 +667,7 @@ def _get_or_create_schema_state( return _SchemaState( current_version=current_version, + compat_version=compat_version, applied_deltas=applied_deltas, upgraded=upgraded, ) diff --git a/synapse/storage/schema/README.md b/synapse/storage/schema/README.md index 030153db64a8..a0dcdaea0b41 100644 --- a/synapse/storage/schema/README.md +++ b/synapse/storage/schema/README.md @@ -15,6 +15,59 @@ At the time of writing, the following "logical" databases are supported: Addionally, the `common` directory contains schema files for tables which must be present on *all* physical databases. +## Synapse schema versions + +Synapse manages its database schema via "schema versions". These are mainly used to +help avoid confusion if the Synapse codebase is rolled back after the database is +updated. They work as follows: + + * The Synapse codebase defines a constant `synapse.storage.schema.SCHEMA_VERSION` + which represents the expectations made about the database by that version. For + example, as of Synapse v1.33, this is `59`. + + * The database stores a "compatibility version" in + `schema_compat_version.compat_version` which defines the `SCHEMA_VERSION` of the + oldest version of Synapse which will work with the database. On startup, if + `compat_version` is found to be newer than `SCHEMA_VERSION`, Synapse will refuse to + start. + + * Whenever a backwards-incompatible change is made to the database format (normally + via a `delta` file), `schema_compat_version.compat_version` is also updated so that + administrators can not accidentally roll back to a too-old version of Synapse. + +Generally, the goal is to maintain compatibility with at least one or two previous +releases of Synapse, so any substantial change tends to require multiple releases and a +bit of forward-planning to get right. + +As a worked example: we want to remove the `room_stats_historical` table. Here is how it +might pan out. + + 1. Replace any code that *reads* from `room_stats_historical` with alternative + implementations, but keep writing to it in case of rollback to an earlier version. + Also, increase `synapse.storage.schema.SCHEMA_VERSION`. In this + instance, there is no existing code which reads from `room_stats_historical`, so + our starting point is: + + v1.33.0: `SCHEMA_VERSION=59`, `compat_version=59` + + 2. Next (say in Synapse v1.34.0): remove the code that *writes* to + `room_stats_historical`, but don’t yet remove the table in case of rollback to + v1.33.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but + because we have not broken compatibility with v1.33, we do not yet update + `compat_version`. We now have: + + v1.34.0: `SCHEMA_VERSION=60`, `compat_version=59`. + + 3. Later (say in Synapse v1.36.0): we can remove the table altogether. This will + break compatibility with v1.33.0, so we must update `compat_version` accordingly. + There is no need to update `synapse.storage.schema.SCHEMA_VERSION`, since there is no + change to the Synapse codebase here. So we end up with: + + v1.36.0: `SCHEMA_VERSION=60`, `compat_version=60`. + +If in doubt about whether to update `SCHEMA_VERSION` or not, it is generally best to +lean towards doing so. + ## Full schema dumps In the `full_schemas` directories, only the most recently-numbered snapshot is useful diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index f0d9f2316762..0f30e22190ba 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Remember to update this number every time a change is made to database -# schema files, so the users will be informed on server restarts. SCHEMA_VERSION = 59 +"""Represents the expectations made by the codebase about the database schema + +This should be incremented whenever the codebase changes its requirements on the +shape of the database schema (even if those requirements are backwards-compatible with +older versions of Synapse). + +See `README.md `_ for more information on how this +works. +""" diff --git a/synapse/storage/schema/common/delta/59/13schema_compat_version.sql b/synapse/storage/schema/common/delta/59/13schema_compat_version.sql new file mode 100644 index 000000000000..c43fc61831e0 --- /dev/null +++ b/synapse/storage/schema/common/delta/59/13schema_compat_version.sql @@ -0,0 +1,17 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- populate schema_compat_version for the first time. +INSERT INTO schema_compat_version(compat_version) VALUES (59); diff --git a/synapse/storage/schema/common/schema_version.sql b/synapse/storage/schema/common/schema_version.sql index 42e5cb6df593..f41fde5d2d6c 100644 --- a/synapse/storage/schema/common/schema_version.sql +++ b/synapse/storage/schema/common/schema_version.sql @@ -20,6 +20,13 @@ CREATE TABLE IF NOT EXISTS schema_version( CHECK (Lock='X') ); +CREATE TABLE IF NOT EXISTS schema_compat_version( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + -- The SCHEMA_VERSION of the oldest synapse this database can be used with + compat_version INTEGER NOT NULL, + CHECK (Lock='X') +); + CREATE TABLE IF NOT EXISTS applied_schema_deltas( version INTEGER NOT NULL, file TEXT NOT NULL, From ddb76762e2dc61cafd78842a7eab46223487369b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 5 May 2021 23:18:53 +0100 Subject: [PATCH 3/7] changelog --- changelog.d/9933.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9933.misc diff --git a/changelog.d/9933.misc b/changelog.d/9933.misc new file mode 100644 index 000000000000..086002667049 --- /dev/null +++ b/changelog.d/9933.misc @@ -0,0 +1 @@ +Update the database schema versioning to support gradual migration away from legacy tables. From 1aac02f9e2ab7af763f1f140973db009a6fed60e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 9 Jun 2021 16:10:32 +0100 Subject: [PATCH 4/7] update version numbers --- synapse/storage/schema/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/storage/schema/README.md b/synapse/storage/schema/README.md index a0dcdaea0b41..4a2010ffe8b0 100644 --- a/synapse/storage/schema/README.md +++ b/synapse/storage/schema/README.md @@ -23,7 +23,7 @@ updated. They work as follows: * The Synapse codebase defines a constant `synapse.storage.schema.SCHEMA_VERSION` which represents the expectations made about the database by that version. For - example, as of Synapse v1.33, this is `59`. + example, as of Synapse v1.36, this is `59`. * The database stores a "compatibility version" in `schema_compat_version.compat_version` which defines the `SCHEMA_VERSION` of the @@ -48,22 +48,22 @@ might pan out. instance, there is no existing code which reads from `room_stats_historical`, so our starting point is: - v1.33.0: `SCHEMA_VERSION=59`, `compat_version=59` + v1.36.0: `SCHEMA_VERSION=59`, `compat_version=59` - 2. Next (say in Synapse v1.34.0): remove the code that *writes* to + 2. Next (say in Synapse v1.37.0): remove the code that *writes* to `room_stats_historical`, but don’t yet remove the table in case of rollback to - v1.33.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but - because we have not broken compatibility with v1.33, we do not yet update + v1.36.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but + because we have not broken compatibility with v1.36, we do not yet update `compat_version`. We now have: - v1.34.0: `SCHEMA_VERSION=60`, `compat_version=59`. + v1.37.0: `SCHEMA_VERSION=60`, `compat_version=59`. - 3. Later (say in Synapse v1.36.0): we can remove the table altogether. This will - break compatibility with v1.33.0, so we must update `compat_version` accordingly. + 3. Later (say in Synapse v1.38.0): we can remove the table altogether. This will + break compatibility with v1.36.0, so we must update `compat_version` accordingly. There is no need to update `synapse.storage.schema.SCHEMA_VERSION`, since there is no change to the Synapse codebase here. So we end up with: - v1.36.0: `SCHEMA_VERSION=60`, `compat_version=60`. + v1.38.0: `SCHEMA_VERSION=60`, `compat_version=60`. If in doubt about whether to update `SCHEMA_VERSION` or not, it is generally best to lean towards doing so. From f15dc2a5057c4f95d21fa73320f3e1f3a8536f8d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 9 Jun 2021 16:30:58 +0100 Subject: [PATCH 5/7] move `compat_version` definition into the Python --- synapse/storage/prepare_database.py | 14 +++++++++++++- synapse/storage/schema/README.md | 17 ++++++++++------- synapse/storage/schema/__init__.py | 8 ++++++++ .../common/delta/59/13schema_compat_version.sql | 17 ----------------- 4 files changed, 31 insertions(+), 25 deletions(-) delete mode 100644 synapse/storage/schema/common/delta/59/13schema_compat_version.sql diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index fd4558a2b02e..683e5e3b90b4 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.engines.postgres import PostgresEngine -from synapse.storage.schema import SCHEMA_VERSION +from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor logger = logging.getLogger(__name__) @@ -374,6 +374,18 @@ def _upgrade_existing_database( assert config is not None check_database_before_upgrade(cur, database_engine, config) + # update schema_compat_version before we run any upgrades, so that if synapse + # gets downgraded again, it won't try to run against the upgraded database. + if ( + current_schema_state.compat_version is None + or current_schema_state.compat_version < SCHEMA_COMPAT_VERSION + ): + cur.execute("DELETE FROM schema_compat_version") + cur.execute( + "INSERT INTO schema_compat_version(compat_version) VALUES (?)", + (SCHEMA_COMPAT_VERSION,), + ) + start_ver = current_schema_state.current_version # if we got to this schema version by running a full_schema rather than a series diff --git a/synapse/storage/schema/README.md b/synapse/storage/schema/README.md index 4a2010ffe8b0..c15fda6d24da 100644 --- a/synapse/storage/schema/README.md +++ b/synapse/storage/schema/README.md @@ -31,9 +31,12 @@ updated. They work as follows: `compat_version` is found to be newer than `SCHEMA_VERSION`, Synapse will refuse to start. + Synapse automatically updates this field from + `synapse.storage.schema.SCHEMA_COMPAT_VERSION`. + * Whenever a backwards-incompatible change is made to the database format (normally - via a `delta` file), `schema_compat_version.compat_version` is also updated so that - administrators can not accidentally roll back to a too-old version of Synapse. + via a `delta` file), `synapse.storage.schema.SCHEMA_COMPAT_VERSION` is also updated + so that administrators can not accidentally roll back to a too-old version of Synapse. Generally, the goal is to maintain compatibility with at least one or two previous releases of Synapse, so any substantial change tends to require multiple releases and a @@ -48,22 +51,22 @@ might pan out. instance, there is no existing code which reads from `room_stats_historical`, so our starting point is: - v1.36.0: `SCHEMA_VERSION=59`, `compat_version=59` + v1.36.0: `SCHEMA_VERSION=59`, `SCHEMA_COMPAT_VERSION=59` 2. Next (say in Synapse v1.37.0): remove the code that *writes* to `room_stats_historical`, but don’t yet remove the table in case of rollback to v1.36.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but because we have not broken compatibility with v1.36, we do not yet update - `compat_version`. We now have: + `SCHEMA_COMPAT_VERSION`. We now have: - v1.37.0: `SCHEMA_VERSION=60`, `compat_version=59`. + v1.37.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=59`. 3. Later (say in Synapse v1.38.0): we can remove the table altogether. This will - break compatibility with v1.36.0, so we must update `compat_version` accordingly. + break compatibility with v1.36.0, so we must update `SCHEMA_COMPAT_VERSION` accordingly. There is no need to update `synapse.storage.schema.SCHEMA_VERSION`, since there is no change to the Synapse codebase here. So we end up with: - v1.38.0: `SCHEMA_VERSION=60`, `compat_version=60`. + v1.38.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=60`. If in doubt about whether to update `SCHEMA_VERSION` or not, it is generally best to lean towards doing so. diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 0f30e22190ba..d36ba1d7737e 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -22,3 +22,11 @@ See `README.md `_ for more information on how this works. """ + + +SCHEMA_COMPAT_VERSION = 59 +"""Limit on how far the synapse codebase can be rolled back without breaking db compat + +This value is stored in the database, and checked on startup. If the value in the +database is greater than SCHEMA_VERSION, then Synapse will refuse to start. +""" diff --git a/synapse/storage/schema/common/delta/59/13schema_compat_version.sql b/synapse/storage/schema/common/delta/59/13schema_compat_version.sql deleted file mode 100644 index c43fc61831e0..000000000000 --- a/synapse/storage/schema/common/delta/59/13schema_compat_version.sql +++ /dev/null @@ -1,17 +0,0 @@ -/* Copyright 2021 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- populate schema_compat_version for the first time. -INSERT INTO schema_compat_version(compat_version) VALUES (59); From 4eae68451d5faac7a8b0c8b32ff026ce7dcecf9d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 9 Jun 2021 17:44:40 +0100 Subject: [PATCH 6/7] Move new docs into the docs hierarchy --- docs/SUMMARY.md | 3 +- docs/development/database_schema.md | 95 +++++++++++++++++++++++++++++ synapse/storage/schema/README.md | 93 +--------------------------- 3 files changed, 99 insertions(+), 92 deletions(-) create mode 100644 docs/development/database_schema.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 8f39ae027001..af2c968c9abe 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -69,6 +69,7 @@ - [Git Usage](dev/git.md) - [Testing]() - [OpenTracing](opentracing.md) + - [Database Schemas](development/database_schema.md) - [Synapse Architecture]() - [Log Contexts](log_contexts.md) - [Replication](replication.md) @@ -84,4 +85,4 @@ - [Scripts]() # Other - - [Dependency Deprecation Policy](deprecation_policy.md) \ No newline at end of file + - [Dependency Deprecation Policy](deprecation_policy.md) diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md new file mode 100644 index 000000000000..d67fdb927475 --- /dev/null +++ b/docs/development/database_schema.md @@ -0,0 +1,95 @@ +# Synapse database schema files + +Synapse's database schema is stored in the `synapse.storage.schema` module. + +## Logical databases + +Synapse supports splitting its datastore across multiple physical databases (which can +be useful for large installations), and the schema files are therefore split according +to the logical database they are apply to. + +At the time of writing, the following "logical" databases are supported: + +* `state` - used to store Matrix room state (more specifically, `state_groups`, + their relationships and contents.) +* `main` - stores everything else. + +Addionally, the `common` directory contains schema files for tables which must be +present on *all* physical databases. + +## Synapse schema versions + +Synapse manages its database schema via "schema versions". These are mainly used to +help avoid confusion if the Synapse codebase is rolled back after the database is +updated. They work as follows: + + * The Synapse codebase defines a constant `synapse.storage.schema.SCHEMA_VERSION` + which represents the expectations made about the database by that version. For + example, as of Synapse v1.36, this is `59`. + + * The database stores a "compatibility version" in + `schema_compat_version.compat_version` which defines the `SCHEMA_VERSION` of the + oldest version of Synapse which will work with the database. On startup, if + `compat_version` is found to be newer than `SCHEMA_VERSION`, Synapse will refuse to + start. + + Synapse automatically updates this field from + `synapse.storage.schema.SCHEMA_COMPAT_VERSION`. + + * Whenever a backwards-incompatible change is made to the database format (normally + via a `delta` file), `synapse.storage.schema.SCHEMA_COMPAT_VERSION` is also updated + so that administrators can not accidentally roll back to a too-old version of Synapse. + +Generally, the goal is to maintain compatibility with at least one or two previous +releases of Synapse, so any substantial change tends to require multiple releases and a +bit of forward-planning to get right. + +As a worked example: we want to remove the `room_stats_historical` table. Here is how it +might pan out. + + 1. Replace any code that *reads* from `room_stats_historical` with alternative + implementations, but keep writing to it in case of rollback to an earlier version. + Also, increase `synapse.storage.schema.SCHEMA_VERSION`. In this + instance, there is no existing code which reads from `room_stats_historical`, so + our starting point is: + + v1.36.0: `SCHEMA_VERSION=59`, `SCHEMA_COMPAT_VERSION=59` + + 2. Next (say in Synapse v1.37.0): remove the code that *writes* to + `room_stats_historical`, but don’t yet remove the table in case of rollback to + v1.36.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but + because we have not broken compatibility with v1.36, we do not yet update + `SCHEMA_COMPAT_VERSION`. We now have: + + v1.37.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=59`. + + 3. Later (say in Synapse v1.38.0): we can remove the table altogether. This will + break compatibility with v1.36.0, so we must update `SCHEMA_COMPAT_VERSION` accordingly. + There is no need to update `synapse.storage.schema.SCHEMA_VERSION`, since there is no + change to the Synapse codebase here. So we end up with: + + v1.38.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=60`. + +If in doubt about whether to update `SCHEMA_VERSION` or not, it is generally best to +lean towards doing so. + +## Full schema dumps + +In the `full_schemas` directories, only the most recently-numbered snapshot is used +(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical +reference only. + +### Building full schema dumps + +If you want to recreate these schemas, they need to be made from a database that +has had all background updates run. + +To do so, use `scripts-dev/make_full_schema.sh`. This will produce new +`full.sql.postgres` and `full.sql.sqlite` files. + +Ensure postgres is installed, then run: + + ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/ + +NB at the time of writing, this script predates the split into separate `state`/`main` +databases so will require updates to handle that correctly. diff --git a/synapse/storage/schema/README.md b/synapse/storage/schema/README.md index c15fda6d24da..729f44ea6cf4 100644 --- a/synapse/storage/schema/README.md +++ b/synapse/storage/schema/README.md @@ -1,93 +1,4 @@ # Synapse Database Schemas -This directory contains the schema files used to build Synapse databases. - -Synapse supports splitting its datastore across multiple physical databases (which can -be useful for large installations), and the schema files are therefore split according -to the logical database they are apply to. - -At the time of writing, the following "logical" databases are supported: - -* `state` - used to store Matrix room state (more specifically, `state_groups`, - their relationships and contents.) -* `main` - stores everything else. - -Addionally, the `common` directory contains schema files for tables which must be -present on *all* physical databases. - -## Synapse schema versions - -Synapse manages its database schema via "schema versions". These are mainly used to -help avoid confusion if the Synapse codebase is rolled back after the database is -updated. They work as follows: - - * The Synapse codebase defines a constant `synapse.storage.schema.SCHEMA_VERSION` - which represents the expectations made about the database by that version. For - example, as of Synapse v1.36, this is `59`. - - * The database stores a "compatibility version" in - `schema_compat_version.compat_version` which defines the `SCHEMA_VERSION` of the - oldest version of Synapse which will work with the database. On startup, if - `compat_version` is found to be newer than `SCHEMA_VERSION`, Synapse will refuse to - start. - - Synapse automatically updates this field from - `synapse.storage.schema.SCHEMA_COMPAT_VERSION`. - - * Whenever a backwards-incompatible change is made to the database format (normally - via a `delta` file), `synapse.storage.schema.SCHEMA_COMPAT_VERSION` is also updated - so that administrators can not accidentally roll back to a too-old version of Synapse. - -Generally, the goal is to maintain compatibility with at least one or two previous -releases of Synapse, so any substantial change tends to require multiple releases and a -bit of forward-planning to get right. - -As a worked example: we want to remove the `room_stats_historical` table. Here is how it -might pan out. - - 1. Replace any code that *reads* from `room_stats_historical` with alternative - implementations, but keep writing to it in case of rollback to an earlier version. - Also, increase `synapse.storage.schema.SCHEMA_VERSION`. In this - instance, there is no existing code which reads from `room_stats_historical`, so - our starting point is: - - v1.36.0: `SCHEMA_VERSION=59`, `SCHEMA_COMPAT_VERSION=59` - - 2. Next (say in Synapse v1.37.0): remove the code that *writes* to - `room_stats_historical`, but don’t yet remove the table in case of rollback to - v1.36.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but - because we have not broken compatibility with v1.36, we do not yet update - `SCHEMA_COMPAT_VERSION`. We now have: - - v1.37.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=59`. - - 3. Later (say in Synapse v1.38.0): we can remove the table altogether. This will - break compatibility with v1.36.0, so we must update `SCHEMA_COMPAT_VERSION` accordingly. - There is no need to update `synapse.storage.schema.SCHEMA_VERSION`, since there is no - change to the Synapse codebase here. So we end up with: - - v1.38.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=60`. - -If in doubt about whether to update `SCHEMA_VERSION` or not, it is generally best to -lean towards doing so. - -## Full schema dumps - -In the `full_schemas` directories, only the most recently-numbered snapshot is useful -(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical -reference only. - -## Building full schema dumps - -If you want to recreate these schemas, they need to be made from a database that -has had all background updates run. - -To do so, use `scripts-dev/make_full_schema.sh`. This will produce new -`full.sql.postgres` and `full.sql.sqlite` files. - -Ensure postgres is installed, then run: - - ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/ - -NB at the time of writing, this script predates the split into separate `state`/`main` -databases so will require updates to handle that correctly. +This directory contains the schema files used to build Synapse databases. For more +information, see /docs/development/database_schema.md. From 3ad9e108262832be6c372edf48533675d0279cb6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 10 Jun 2021 13:36:29 +0100 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- docs/development/database_schema.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md index d67fdb927475..7fe8ec63e1c8 100644 --- a/docs/development/database_schema.md +++ b/docs/development/database_schema.md @@ -6,15 +6,15 @@ Synapse's database schema is stored in the `synapse.storage.schema` module. Synapse supports splitting its datastore across multiple physical databases (which can be useful for large installations), and the schema files are therefore split according -to the logical database they are apply to. +to the logical database they apply to. At the time of writing, the following "logical" databases are supported: * `state` - used to store Matrix room state (more specifically, `state_groups`, - their relationships and contents.) + their relationships and contents). * `main` - stores everything else. -Addionally, the `common` directory contains schema files for tables which must be +Additionally, the `common` directory contains schema files for tables which must be present on *all* physical databases. ## Synapse schema versions