Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Use the state event amount for userdir import batching, not room count (
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkowl authored Mar 26, 2019
1 parent 4a125be commit 903f04c
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
1 change: 1 addition & 0 deletions changelog.d/4944.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The user directory has been rewritten to make it faster, with less chance of falling behind on a large server.
28 changes: 20 additions & 8 deletions synapse/storage/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,30 +135,34 @@ def _delete_staging_area(txn):

@defer.inlineCallbacks
def _populate_user_directory_process_rooms(self, progress, batch_size):

"""
Args:
progress (dict)
batch_size (int): Maximum number of state events to process
per cycle.
"""
state = self.hs.get_state_handler()

# If we don't have progress filed, delete everything.
if not progress:
yield self.delete_all_from_user_dir()

def _get_next_batch(txn):
# Only fetch 250 rooms, so we don't fetch too many at once, even
# if those 250 rooms have less than batch_size state events.
sql = """
SELECT room_id FROM %s
SELECT room_id, events FROM %s
ORDER BY events DESC
LIMIT %s
LIMIT 250
""" % (
TEMP_TABLE + "_rooms",
str(batch_size),
)
txn.execute(sql)
rooms_to_work_on = txn.fetchall()

if not rooms_to_work_on:
return None

rooms_to_work_on = [x[0] for x in rooms_to_work_on]

# Get how many are left to process, so we can give status on how
# far we are in processing
txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
Expand All @@ -180,7 +184,9 @@ def _get_next_batch(txn):
% (len(rooms_to_work_on), progress["remaining"])
)

for room_id in rooms_to_work_on:
processed_event_count = 0

for room_id, event_count in rooms_to_work_on:
is_in_room = yield self.is_host_joined(room_id, self.server_name)

if is_in_room:
Expand Down Expand Up @@ -247,7 +253,13 @@ def _get_next_batch(txn):
progress,
)

defer.returnValue(len(rooms_to_work_on))
processed_event_count += event_count

if processed_event_count > batch_size:
# Don't process any more rooms, we've hit our batch size.
defer.returnValue(processed_event_count)

defer.returnValue(processed_event_count)

@defer.inlineCallbacks
def _populate_user_directory_process_users(self, progress, batch_size):
Expand Down

0 comments on commit 903f04c

Please sign in to comment.