From baafb85ba461b7d5073de94422fed0c43a417f46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 11:57:02 +0100 Subject: [PATCH 1/4] Add an index to event_search - to make the purge API quicker --- synapse/storage/background_updates.py | 10 +++++++--- synapse/storage/events.py | 11 +++++++++++ .../delta/41/event_search_event_id_idx.sql | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 synapse/storage/schema/delta/41/event_search_event_id_idx.sql diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index d4cf0fc59bb0..12a8b8259cd1 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -210,7 +210,8 @@ def register_background_update_handler(self, update_name, update_handler): self._background_update_handlers[update_name] = update_handler def register_background_index_update(self, update_name, index_name, - table, columns, where_clause=None): + table, columns, where_clause=None, + unique=False): """Helper for store classes to do a background index addition To use: @@ -245,9 +246,11 @@ def create_index_psql(conn): c.execute(sql) sql = ( - "CREATE INDEX CONCURRENTLY %(name)s ON %(table)s" + "CREATE %(unique)s INDEX CONCURRENTLY %(name)s" + " ON %(table)s" " (%(columns)s) %(where_clause)s" ) % { + "unique": "UNIQUE" if unique else "", "name": index_name, "table": table, "columns": ", ".join(columns), @@ -270,9 +273,10 @@ def create_index_sqlite(conn): # down at the wrong moment - hance we use IF NOT EXISTS. (SQLite # has supported CREATE TABLE|INDEX IF NOT EXISTS since 3.3.0.) sql = ( - "CREATE INDEX IF NOT EXISTS %(name)s ON %(table)s" + "CREATE %(unique)s INDEX IF NOT EXISTS %(name)s ON %(table)s" " (%(columns)s)" ) % { + "unique": "UNIQUE" if unique else "", "name": index_name, "table": table, "columns": ", ".join(columns), diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dbd63078c6c7..1fae1aeacfd8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -207,6 +207,17 @@ def __init__(self, hs): where_clause="contains_url = true AND outlier = false", ) + # an event_id index on event_search is useful for the purge_history + # api. Plus it means we get to enforce some integrity with a UNIQUE + # clause + self.register_background_index_update( + "event_search_event_id_idx", + index_name="event_search_event_id_idx", + table="event_search", + columns=["event_id"], + unique=True, + ) + self._event_persist_queue = _EventPeristenceQueue() def persist_events(self, events_and_contexts, backfilled=False): diff --git a/synapse/storage/schema/delta/41/event_search_event_id_idx.sql b/synapse/storage/schema/delta/41/event_search_event_id_idx.sql new file mode 100644 index 000000000000..5d9cfecf361e --- /dev/null +++ b/synapse/storage/schema/delta/41/event_search_event_id_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_search_event_id_idx', '{}'); From 114f2909479b4396f3da8cff651990f075b4bfba Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 12:06:28 +0100 Subject: [PATCH 2/4] Add more logging for purging Log the number of events we will be deleting at info. --- synapse/storage/events.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1fae1aeacfd8..627e91a52234 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2033,6 +2033,8 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): 400, "topological_ordering is greater than forward extremeties" ) + logger.debug("[purge] looking for events to delete") + txn.execute( "SELECT event_id, state_key FROM events" " LEFT JOIN state_events USING (room_id, event_id)" @@ -2041,6 +2043,14 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) event_rows = txn.fetchall() + to_delete = [ + (event_id,) for event_id, state_key in event_rows + if state_key is None and not self.hs.is_mine_id(event_id) + ] + logger.info( + "[purge] found %i events before cutoff, of which %i are remote" + " non-state events to delete", len(event_rows), len(to_delete)) + for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) @@ -2091,6 +2101,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) state_rows = txn.fetchall() + logger.debug("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently @@ -2184,10 +2195,6 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) # Delete all remote non-state events - to_delete = [ - (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) - ] for table in ( "events", "event_json", @@ -2203,7 +2210,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): "event_signatures", "rejections", ): - logger.debug("[purge] removing non-state events from %s", table) + logger.debug("[purge] removing remote non-state events from %s", table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2211,7 +2218,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) # Mark all state and own events as outliers - logger.debug("[purge] marking events as outliers") + logger.debug("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", @@ -2221,7 +2228,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ] ) - logger.debug("[purge] done") + logger.info("[purge] done") @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): From 34194aaff7c86d57c6dabfb016b7597d999b2001 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 12:46:55 +0100 Subject: [PATCH 3/4] Don't create event_search index on sqlite ... because the table is virtual --- synapse/storage/background_updates.py | 13 ++++++++++--- synapse/storage/events.py | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 12a8b8259cd1..7157fb1dfbc2 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -211,7 +211,8 @@ def register_background_update_handler(self, update_name, update_handler): def register_background_index_update(self, update_name, index_name, table, columns, where_clause=None, - unique=False): + unique=False, + psql_only=False): """Helper for store classes to do a background index addition To use: @@ -227,6 +228,9 @@ def register_background_index_update(self, update_name, index_name, index_name (str): name of index to add table (str): table to add index to columns (list[str]): columns/expressions to include in index + unique (bool): true to make a UNIQUE index + psql_only: true to only create this index on psql databases (useful + for virtual sqlite tables) """ def create_index_psql(conn): @@ -288,13 +292,16 @@ def create_index_sqlite(conn): if isinstance(self.database_engine, engines.PostgresEngine): runner = create_index_psql + elif psql_only: + runner = None else: runner = create_index_sqlite @defer.inlineCallbacks def updater(progress, batch_size): - logger.info("Adding index %s to %s", index_name, table) - yield self.runWithConnection(runner) + if runner is not None: + logger.info("Adding index %s to %s", index_name, table) + yield self.runWithConnection(runner) yield self._end_background_update(update_name) defer.returnValue(1) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 627e91a52234..ea6879c61941 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -216,6 +216,7 @@ def __init__(self, hs): table="event_search", columns=["event_id"], unique=True, + psql_only=True, ) self._event_persist_queue = _EventPeristenceQueue() From ff3d810ea8e84a48508a08e6246c7d70739c94ea Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 12:48:50 +0100 Subject: [PATCH 4/4] Add a comment to old delta --- synapse/storage/schema/delta/37/remove_auth_idx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/schema/delta/37/remove_auth_idx.py b/synapse/storage/schema/delta/37/remove_auth_idx.py index 784f3b348f3b..20ad8bd5a653 100644 --- a/synapse/storage/schema/delta/37/remove_auth_idx.py +++ b/synapse/storage/schema/delta/37/remove_auth_idx.py @@ -36,6 +36,10 @@ -- and is used incredibly rarely. DROP INDEX IF EXISTS events_order_topo_stream_room; +-- an equivalent index to this actually gets re-created in delta 41, because it +-- turned out that deleting it wasn't a great plan :/. In any case, let's +-- delete it here, and delta 41 will create a new one with an added UNIQUE +-- constraint DROP INDEX IF EXISTS event_search_ev_idx; """