From 933daec9fe09db041156553608c529a64dba9586 Mon Sep 17 00:00:00 2001 From: pacien Date: Fri, 30 Jun 2023 22:45:09 +0200 Subject: [PATCH] devices: use combined ANY clause for faster cleanup Old device entries for the same user were being removed in individual SQL commands, making the batch take way longer than necessary. This combines the commands into a single one with a IN/ANY clause. Example of log entry before the change, regularly observed with "log_min_duration_statement = 10000" in PostgreSQL's config: LOG: duration: 42538.282 ms statement: DELETE FROM device_lists_stream WHERE user_id = '@someone' AND device_id = 'someid1' AND stream_id < 123456789 ; DELETE FROM device_lists_stream WHERE user_id = '@someone' AND device_id = 'someid2' AND stream_id < 123456789 ; [repeated for each device ID of that user, potentially a lot...] With the patch applied on my instance for the past couple of days, I no longer notice overly long statements of that particular kind. --- synapse/storage/databases/main/devices.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index f677d048aafb..d9df437e518a 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1950,12 +1950,16 @@ def _add_device_change_to_stream_txn( # Delete older entries in the table, as we really only care about # when the latest change happened. - txn.execute_batch( - """ + cleanup_obsolete_stmt = """ DELETE FROM device_lists_stream - WHERE user_id = ? AND device_id = ? AND stream_id < ? - """, - [(user_id, device_id, min_stream_id) for device_id in device_ids], + WHERE user_id = ? AND stream_id < ? AND %s + """ + device_ids_clause, device_ids_args = make_in_list_sql_clause( + txn.database_engine, "device_id", device_ids + ) + txn.execute( + cleanup_obsolete_stmt % (device_ids_clause,), + [user_id, min_stream_id] + device_ids_args, ) self.db_pool.simple_insert_many_txn(