From c877f0f0345f1ff6d329af2920d7d1a6b5659a86 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 16:41:39 +0100 Subject: [PATCH 01/13] Optimise event_search in postgres --- synapse/storage/room.py | 16 +++-- .../storage/schema/delta/31/search_update.py | 65 +++++++++++++++++++ synapse/storage/search.py | 61 ++++++++++++++++- 3 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/31/search_update.py diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9be977f387fb..70aa64fb3105 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -169,20 +169,28 @@ def _store_content_index_txn(self, txn, event, key): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) else: # This should be unreachable. raise Exception("Unrecognized database engine") - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py new file mode 100644 index 000000000000..46a3795d122a --- /dev/null +++ b/synapse/storage/schema/delta/31/search_update.py @@ -0,0 +1,65 @@ +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine +from synapse.storage.prepare_database import get_statements + +import logging +import ujson + +logger = logging.getLogger(__name__) + + +ALTER_TABLE = """ +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; + +CREATE INDEX event_search_room_order ON event_search( + room_id, origin_server_ts, stream_ordering +); +CREATE INDEX event_search_order ON event_search(origin_server_ts, stream_ordering); +""" + + +def run_create(cur, database_engine, *args, **kwargs): + if not isinstance(database_engine, PostgresEngine): + return + + for statement in get_statements(ALTER_TABLE.splitlines()): + cur.execute(statement) + + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] + + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + + sql = ( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)" + ) + + sql = database_engine.convert_param_style(sql) + + cur.execute(sql, ("event_search_order", progress_json)) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 59ac7f424c20..375057fa3e1e 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -29,12 +29,17 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" + EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" def __init__(self, hs): super(SearchStore, self).__init__(hs) self.register_background_update_handler( self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search ) + self.register_background_update_handler( + self.EVENT_SEARCH_ORDER_UPDATE_NAME, + self._background_reindex_search_order + ) @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): @@ -131,6 +136,61 @@ def reindex_search_txn(txn): defer.returnValue(result) + @defer.inlineCallbacks + def _background_reindex_search_order(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_ordering, origin_server_ts, event_id FROM events" + " INNER JOIN event_search USING (room_id, event_id)" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + + sql = ( + "UPDATE event_search SET stream_ordering = ?, origin_server_ts = ?" + " WHERE event_id = ?" + ) + + for index in range(0, len(rows), INSERT_CLUMP_SIZE): + clump = rows[index:index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows) + } + + self._background_update_progress_txn( + txn, self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress + ) + + return len(rows) + + result = yield self.runInteraction( + self.EVENT_SEARCH_ORDER_UPDATE_NAME, reindex_search_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_SEARCH_ORDER_UPDATE_NAME) + + defer.returnValue(result) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. @@ -310,7 +370,6 @@ def search_rooms(self, room_ids, search_term, keys, limit, pagination_token=None "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank," " origin_server_ts, stream_ordering, room_id, event_id" " FROM event_search" - " NATURAL JOIN events" " WHERE vector @@ to_tsquery('english', ?) AND " ) args = [search_query, search_query] + args From b743c1237e68e75056b83ea4ab93ba2e1ec44b7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:12:04 +0100 Subject: [PATCH 02/13] Add missing run_upgrade --- synapse/storage/schema/delta/31/search_update.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 46a3795d122a..989e990dbdcd 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -63,3 +63,7 @@ def run_create(cur, database_engine, *args, **kwargs): sql = database_engine.convert_param_style(sql) cur.execute(sql, ("event_search_order", progress_json)) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + pass From 51bb339ab2130ab29ee9fcfec48d8e62f46c75f6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:16:11 +0100 Subject: [PATCH 03/13] Create index concurrently --- synapse/storage/schema/delta/31/search_update.py | 6 +----- synapse/storage/search.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 989e990dbdcd..470ae0c00559 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -24,11 +24,6 @@ ALTER_TABLE = """ ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; - -CREATE INDEX event_search_room_order ON event_search( - room_id, origin_server_ts, stream_ordering -); -CREATE INDEX event_search_order ON event_search(origin_server_ts, stream_ordering); """ @@ -52,6 +47,7 @@ def run_create(cur, database_engine, *args, **kwargs): "target_min_stream_id_inclusive": min_stream_id, "max_stream_id_exclusive": max_stream_id + 1, "rows_inserted": 0, + "have_added_indexes": False, } progress_json = ujson.dumps(progress) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 375057fa3e1e..548e9eeaef6c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -141,10 +141,21 @@ def _background_reindex_search_order(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) + have_added_index = progress['have_added_indexes'] INSERT_CLUMP_SIZE = 1000 def reindex_search_txn(txn): + if not have_added_index: + txn.execute( + "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" + "room_id, origin_server_ts, stream_ordering)" + ) + txn.execute( + "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" + "origin_server_ts, stream_ordering)" + ) + sql = ( "SELECT stream_ordering, origin_server_ts, event_id FROM events" " INNER JOIN event_search USING (room_id, event_id)" @@ -173,7 +184,8 @@ def reindex_search_txn(txn): progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows) + "rows_inserted": rows_inserted + len(rows), + "have_added_index": True, } self._background_update_progress_txn( From 129e4034870956126daf520b41f74a51040ddbf9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:19:25 +0100 Subject: [PATCH 04/13] Create index must be on a conn --- synapse/storage/search.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 548e9eeaef6c..05641fb5795a 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -143,19 +143,26 @@ def _background_reindex_search_order(self, progress, batch_size): rows_inserted = progress.get("rows_inserted", 0) have_added_index = progress['have_added_indexes'] - INSERT_CLUMP_SIZE = 1000 - - def reindex_search_txn(txn): - if not have_added_index: - txn.execute( + if not have_added_index: + def create_index(conn): + conn.rollback() + conn.set_session(autocommit=True) + c = conn.cursor() + c.execute( "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" "room_id, origin_server_ts, stream_ordering)" ) - txn.execute( + c.execute( "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" "origin_server_ts, stream_ordering)" ) + conn.set_session(autocommit=False) + + yield self.runWithConnection(create_index) + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): sql = ( "SELECT stream_ordering, origin_server_ts, event_id FROM events" " INNER JOIN event_search USING (room_id, event_id)" From 3b0fa77f5050bb54dccc5140a76b171d6603f2e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:37:42 +0100 Subject: [PATCH 05/13] Fix SQL statement --- synapse/storage/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 05641fb5795a..f7730f5d7c79 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -164,10 +164,10 @@ def create_index(conn): def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, origin_server_ts, event_id FROM events" + "SELECT e.stream_ordering, e.origin_server_ts, event_id FROM events as e" " INNER JOIN event_search USING (room_id, event_id)" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" + " WHERE ? <= e.stream_ordering AND e.stream_ordering < ?" + " ORDER BY e.stream_ordering DESC" " LIMIT ?" ) From e395eb1108abac2ada1f846d08285a84fd5042a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:39:24 +0100 Subject: [PATCH 06/13] Update progress when creating index --- synapse/storage/search.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f7730f5d7c79..1baed674a8d7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -158,6 +158,13 @@ def create_index(conn): ) conn.set_session(autocommit=False) + pg = dict(progress) + pg["have_added_indexes"] = True + + self._background_update_progress_txn( + conn.cursor(), self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress + ) + yield self.runWithConnection(create_index) INSERT_CLUMP_SIZE = 1000 From 26db18bc90e745e1eb054e2ffd6969918a6253c4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:45:56 +0100 Subject: [PATCH 07/13] Need to do _background_update_progress_txn in actual transaction --- synapse/storage/search.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 1baed674a8d7..fd40b44a9a2a 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -158,14 +158,16 @@ def create_index(conn): ) conn.set_session(autocommit=False) - pg = dict(progress) - pg["have_added_indexes"] = True + yield self.runWithConnection(create_index) - self._background_update_progress_txn( - conn.cursor(), self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress - ) + pg = dict(progress) + pg["have_added_indexes"] = True - yield self.runWithConnection(create_index) + yield self.runInteraction( + self.EVENT_SEARCH_ORDER_UPDATE_NAME, + self._background_update_progress_txn, + self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, + ) INSERT_CLUMP_SIZE = 1000 From b57dcb4b51d31914756412ec00d94646bc3b4c79 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:49:00 +0100 Subject: [PATCH 08/13] Typo --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index fd40b44a9a2a..dc47425c239c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -201,7 +201,7 @@ def reindex_search_txn(txn): "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, "rows_inserted": rows_inserted + len(rows), - "have_added_index": True, + "have_added_indexes": True, } self._background_update_progress_txn( From 8fae3d7b1eea87b48db96f1671d850a4a247e926 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:01:49 +0100 Subject: [PATCH 09/13] Use special UPDATE syntax --- .../storage/schema/delta/31/search_update.py | 4 +-- synapse/storage/search.py | 32 +++++++------------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 470ae0c00559..2c15edd1a48d 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -22,8 +22,8 @@ ALTER_TABLE = """ -ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; -ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT DEFAULT 0; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT DEFAULT 0; """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dc47425c239c..813e1e90ace1 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -169,34 +169,26 @@ def create_index(conn): self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, ) - INSERT_CLUMP_SIZE = 1000 - def reindex_search_txn(txn): - sql = ( - "SELECT e.stream_ordering, e.origin_server_ts, event_id FROM events as e" - " INNER JOIN event_search USING (room_id, event_id)" - " WHERE ? <= e.stream_ordering AND e.stream_ordering < ?" - " ORDER BY e.stream_ordering DESC" + events_sql = ( + "SELECT stream_ordering, origin_server_ts, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" " LIMIT ?" ) - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + sql = ( + "UPDATE event_search AS es SET es.stream_ordering = e.stream_ordering," + " es.origin_server_ts = e.origin_server_ts" + " FROM (%s) AS e" + " WHERE e.event_id = es.event_id" + " RETURNING es.stream_ordering" + ) % (events_sql,) + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) rows = txn.fetchall() - if not rows: - return 0 - min_stream_id = rows[-1][0] - sql = ( - "UPDATE event_search SET stream_ordering = ?, origin_server_ts = ?" - " WHERE event_id = ?" - ) - - for index in range(0, len(rows), INSERT_CLUMP_SIZE): - clump = rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, From 3ddbb1687ced5eb9b9a87367fcd6b754b8d0c5dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:02:36 +0100 Subject: [PATCH 10/13] Fix query --- synapse/storage/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 813e1e90ace1..dd3486783d3c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -178,8 +178,8 @@ def reindex_search_txn(txn): ) sql = ( - "UPDATE event_search AS es SET es.stream_ordering = e.stream_ordering," - " es.origin_server_ts = e.origin_server_ts" + "UPDATE event_search AS es SET stream_ordering = e.stream_ordering," + " origin_server_ts = e.origin_server_ts" " FROM (%s) AS e" " WHERE e.event_id = es.event_id" " RETURNING es.stream_ordering" From ae571810f2283c1825da62af0e931a0e40f74168 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:09:48 +0100 Subject: [PATCH 11/13] Order NULLs first --- .../storage/schema/delta/31/search_update.py | 4 ++-- synapse/storage/search.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 2c15edd1a48d..470ae0c00559 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -22,8 +22,8 @@ ALTER_TABLE = """ -ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT DEFAULT 0; -ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT DEFAULT 0; +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dd3486783d3c..2c71db8c96dd 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -148,13 +148,16 @@ def create_index(conn): conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() + + # We create with NULLS FIRST so that when we search *backwards* + # we get the ones with non null origin_server_ts *first* c.execute( "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" - "room_id, origin_server_ts, stream_ordering)" + "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) c.execute( "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" - "origin_server_ts, stream_ordering)" + "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) conn.set_session(autocommit=False) @@ -434,7 +437,15 @@ def search_rooms(self, room_ids, search_term, keys, limit, pagination_token=None # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. - sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?" + if isinstance(self.database_engine, PostgresEngine): + sql += ( + " ORDER BY origin_server_ts DESC NULLS LAST," + " stream_ordering DESC NULLS LAST LIMIT ?" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?" + else: + raise Exception("Unrecognized database engine") args.append(limit) From 183cacac90ca237b448da244270d55920470389b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 22 Apr 2016 09:37:16 +0100 Subject: [PATCH 12/13] Simplify query and handle finishing correctly --- synapse/storage/background_updates.py | 3 ++- synapse/storage/search.py | 30 +++++++++++++-------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 49904046cfc6..66a995157d4b 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -173,11 +173,12 @@ def do_background_update(self, desired_duration_ms): logger.info( "Updating %r. Updated %r items in %rms." - " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)", + " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)", update_name, items_updated, duration_ms, performance.total_items_per_ms(), performance.average_items_per_ms(), performance.total_item_count, + batch_size, ) performance.update(items_updated, duration_ms) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 2c71db8c96dd..02242996251c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -169,28 +169,26 @@ def create_index(conn): yield self.runInteraction( self.EVENT_SEARCH_ORDER_UPDATE_NAME, self._background_update_progress_txn, - self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, + self.EVENT_SEARCH_ORDER_UPDATE_NAME, pg, ) def reindex_search_txn(txn): - events_sql = ( - "SELECT stream_ordering, origin_server_ts, event_id FROM events" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - sql = ( "UPDATE event_search AS es SET stream_ordering = e.stream_ordering," " origin_server_ts = e.origin_server_ts" - " FROM (%s) AS e" + " FROM events AS e" " WHERE e.event_id = es.event_id" + " AND ? <= e.stream_ordering AND e.stream_ordering < ?" " RETURNING es.stream_ordering" - ) % (events_sql,) + ) - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + min_stream_id = max_stream_id - batch_size + txn.execute(sql, (min_stream_id, max_stream_id)) rows = txn.fetchall() - min_stream_id = rows[-1][0] + + if min_stream_id < target_min_stream_id: + # We've recached the end. + return len(rows), False progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -203,16 +201,16 @@ def reindex_search_txn(txn): txn, self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress ) - return len(rows) + return len(rows), True - result = yield self.runInteraction( + num_rows, finished = yield self.runInteraction( self.EVENT_SEARCH_ORDER_UPDATE_NAME, reindex_search_txn ) - if not result: + if not finished: yield self._end_background_update(self.EVENT_SEARCH_ORDER_UPDATE_NAME) - defer.returnValue(result) + defer.returnValue(num_rows) @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): From 4063fe0283b09a7a300dd41972629607b917e1e2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 22 Apr 2016 10:35:53 +0100 Subject: [PATCH 13/13] Update port script --- scripts/synapse_port_db | 123 ++++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 42 deletions(-) diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 253a6ef6c7ce..efd04da2d6ed 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -214,6 +214,10 @@ class Porter(object): self.progress.add_table(table, postgres_size, table_size) + if table == "event_search": + yield self.handle_search_table(postgres_size, table_size, next_chunk) + return + select = ( "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) @@ -232,60 +236,95 @@ class Porter(object): if rows: next_chunk = rows[-1][0] + 1 - if table == "event_search": - # We have to treat event_search differently since it has a - # different structure in the two different databases. - def insert(txn): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, sender, vector)" - " VALUES (?,?,?,?,to_tsvector('english', ?))" - ) + self._convert_rows(table, headers, rows) - rows_dict = [ - dict(zip(headers, row)) - for row in rows - ] - - txn.executemany(sql, [ - ( - row["event_id"], - row["room_id"], - row["key"], - row["sender"], - row["value"], - ) - for row in rows_dict - ]) - - self.postgres_store._simple_update_one_txn( - txn, - table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, - ) - else: - self._convert_rows(table, headers, rows) + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) - def insert(txn): - self.postgres_store.insert_many_txn( - txn, table, headers[1:], rows - ) + self.postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) + + yield self.postgres_store.execute(insert) + + postgres_size += len(rows) + + self.progress.update(table, postgres_size) + else: + return + + @defer.inlineCallbacks + def handle_search_table(self, postgres_size, table_size, next_chunk): + select = ( + "SELECT es.rowid, es.*, e.origin_server_ts, e.stream_ordering" + " FROM event_search as es" + " INNER JOIN events AS e USING (event_id, room_id)" + " WHERE es.rowid >= ?" + " ORDER BY es.rowid LIMIT ?" + ) - self.postgres_store._simple_update_one_txn( - txn, - table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, + while True: + def r(txn): + txn.execute(select, (next_chunk, self.batch_size,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + return headers, rows + + headers, rows = yield self.sqlite_store.runInteraction("select", r) + + if rows: + next_chunk = rows[-1][0] + 1 + + # We have to treat event_search differently since it has a + # different structure in the two different databases. + def insert(txn): + sql = ( + "INSERT INTO event_search (event_id, room_id, key," + " sender, vector, origin_server_ts, stream_ordering)" + " VALUES (?,?,?,?,to_tsvector('english', ?),?,?)" + ) + + rows_dict = [ + dict(zip(headers, row)) + for row in rows + ] + + txn.executemany(sql, [ + ( + row["event_id"], + row["room_id"], + row["key"], + row["sender"], + row["value"], + row["origin_server_ts"], + row["stream_ordering"], ) + for row in rows_dict + ]) + + self.postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": "event_search"}, + updatevalues={"rowid": next_chunk}, + ) yield self.postgres_store.execute(insert) postgres_size += len(rows) - self.progress.update(table, postgres_size) + self.progress.update("event_search", postgres_size) + else: return + def setup_db(self, db_config, database_engine): db_conn = database_engine.module.connect( **{