Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #2769 from matrix-org/matthew/hit_the_gin
Browse files Browse the repository at this point in the history
switch back from GIST to GIN indexes
  • Loading branch information
richvdh authored Feb 14, 2018
2 parents c0c9327 + 5978dcc commit d28ec43
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 20 deletions.
19 changes: 19 additions & 0 deletions synapse/storage/background_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,25 @@ def register_background_update_handler(self, update_name, update_handler):
"""
self._background_update_handlers[update_name] = update_handler

def register_noop_background_update(self, update_name):
"""Register a noop handler for a background update.
This is useful when we previously did a background update, but no
longer wish to do the update. In this case the background update should
be removed from the schema delta files, but there may still be some
users who have the background update queued, so this method should
also be called to clear the update.
Args:
update_name (str): Name of update
"""
@defer.inlineCallbacks
def noop_update(progress, batch_size):
yield self._end_background_update(update_name)
defer.returnValue(1)

self.register_background_update_handler(update_name, noop_update)

def register_background_index_update(self, update_name, index_name,
table, columns, where_clause=None,
unique=False,
Expand Down
7 changes: 1 addition & 6 deletions synapse/storage/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ def __init__(self, db_conn, hs):
# we no longer use refresh tokens, but it's possible that some people
# might have a background update queued to build this index. Just
# clear the background update.
@defer.inlineCallbacks
def noop_update(progress, batch_size):
yield self._end_background_update("refresh_tokens_device_index")
defer.returnValue(1)
self.register_background_update_handler(
"refresh_tokens_device_index", noop_update)
self.register_noop_background_update("refresh_tokens_device_index")

@defer.inlineCallbacks
def add_access_token_to_user(self, user_id, token, device_id=None):
Expand Down
6 changes: 4 additions & 2 deletions synapse/storage/schema/delta/38/postgres_fts_gist.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@
* limitations under the License.
*/

INSERT into background_updates (update_name, progress_json)
VALUES ('event_search_postgres_gist', '{}');
-- We no longer do this given we back it out again in schema 47

-- INSERT into background_updates (update_name, progress_json)
-- VALUES ('event_search_postgres_gist', '{}');
17 changes: 17 additions & 0 deletions synapse/storage/schema/delta/47/postgres_fts_gin.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/* Copyright 2018 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

INSERT into background_updates (update_name, progress_json)
VALUES ('event_search_postgres_gin', '{}');
81 changes: 69 additions & 12 deletions synapse/storage/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class SearchStore(BackgroundUpdateStore):
EVENT_SEARCH_UPDATE_NAME = "event_search"
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"

def __init__(self, db_conn, hs):
super(SearchStore, self).__init__(db_conn, hs)
Expand All @@ -48,9 +49,19 @@ def __init__(self, db_conn, hs):
self.EVENT_SEARCH_ORDER_UPDATE_NAME,
self._background_reindex_search_order
)
self.register_background_update_handler(

# we used to have a background update to turn the GIN index into a
# GIST one; we no longer do that (obviously) because we actually want
# a GIN index. However, it's possible that some people might still have
# the background update queued, so we register a handler to clear the
# background update.
self.register_noop_background_update(
self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME,
self._background_reindex_gist_search
)

self.register_background_update_handler(
self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME,
self._background_reindex_gin_search
)

@defer.inlineCallbacks
Expand Down Expand Up @@ -151,25 +162,48 @@ def reindex_search_txn(txn):
defer.returnValue(result)

@defer.inlineCallbacks
def _background_reindex_gist_search(self, progress, batch_size):
def _background_reindex_gin_search(self, progress, batch_size):
"""This handles old synapses which used GIST indexes, if any;
converting them back to be GIN as per the actual schema.
"""

def create_index(conn):
conn.rollback()
conn.set_session(autocommit=True)
c = conn.cursor()

c.execute(
"CREATE INDEX CONCURRENTLY event_search_fts_idx_gist"
" ON event_search USING GIST (vector)"
)
# we have to set autocommit, because postgres refuses to
# CREATE INDEX CONCURRENTLY without it.
conn.set_session(autocommit=True)

c.execute("DROP INDEX event_search_fts_idx")
try:
c = conn.cursor()

conn.set_session(autocommit=False)
# if we skipped the conversion to GIST, we may already/still
# have an event_search_fts_idx; unfortunately postgres 9.4
# doesn't support CREATE INDEX IF EXISTS so we just catch the
# exception and ignore it.
import psycopg2
try:
c.execute(
"CREATE INDEX CONCURRENTLY event_search_fts_idx"
" ON event_search USING GIN (vector)"
)
except psycopg2.ProgrammingError as e:
logger.warn(
"Ignoring error %r when trying to switch from GIST to GIN",
e
)

# we should now be able to delete the GIST index.
c.execute(
"DROP INDEX IF EXISTS event_search_fts_idx_gist"
)
finally:
conn.set_session(autocommit=False)

if isinstance(self.database_engine, PostgresEngine):
yield self.runWithConnection(create_index)

yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME)
yield self._end_background_update(self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME)
defer.returnValue(1)

@defer.inlineCallbacks
Expand Down Expand Up @@ -289,7 +323,30 @@ def store_search_entries_txn(self, txn, entries):
entry.stream_ordering, entry.origin_server_ts,
) for entry in entries)

# inserts to a GIN index are normally batched up into a pending
# list, and then all committed together once the list gets to a
# certain size. The trouble with that is that postgres (pre-9.5)
# uses work_mem to determine the length of the list, and work_mem
# is typically very large.
#
# We therefore reduce work_mem while we do the insert.
#
# (postgres 9.5 uses the separate gin_pending_list_limit setting,
# so doesn't suffer the same problem, but changing work_mem will
# be harmless)
#
# Note that we don't need to worry about restoring it on
# exception, because exceptions will cause the transaction to be
# rolled back, including the effects of the SET command.
#
# Also: we use SET rather than SET LOCAL because there's lots of
# other stuff going on in this transaction, which want to have the
# normal work_mem setting.

txn.execute("SET work_mem='256kB'")
txn.executemany(sql, args)
txn.execute("RESET work_mem")

elif isinstance(self.database_engine, Sqlite3Engine):
sql = (
"INSERT INTO event_search (event_id, room_id, key, value)"
Expand Down

0 comments on commit d28ec43

Please sign in to comment.