From d26ac746d42f54af4dffdc1c4d7da05318cd9584 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Jul 2024 18:40:23 -0500 Subject: [PATCH 001/142] Start thinking about schemas --- .../delta/87/01_sliding_sync_memberships.sql | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql new file mode 100644 index 00000000000..cd211bad922 --- /dev/null +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -0,0 +1,35 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2024 New Vector, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + +CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( + room_id TEXT NOT NULL REFERENCES rooms(room_id), + room_type TEXT, + room_name TEXT, + is_encrypted BOOLEAN, + stream_ordering: BIGINT, + bump_stamp: BIGINT, +); + +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_joined_rooms_room_id ON sliding_sync_joined_rooms(room_id); + +CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( + membership_event_id TEXT NOT NULL REFERENCES events(event_id), + room_id TEXT NOT NULL REFERENCES rooms(room_id), + room_type TEXT, + room_name TEXT, + is_encrypted BOOLEAN, + stream_ordering: BIGINT, + bump_stamp: BIGINT, +); + +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_non_join_memberships_membership_event_id ON sliding_sync_non_join_memberships(membership_event_id); From e7e9cb289d3cd0e877a0bd25e872b0992feaddec Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Jul 2024 18:43:50 -0500 Subject: [PATCH 002/142] Add changelog --- changelog.d/17512.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17512.misc diff --git a/changelog.d/17512.misc b/changelog.d/17512.misc new file mode 100644 index 00000000000..756918e2b21 --- /dev/null +++ b/changelog.d/17512.misc @@ -0,0 +1 @@ +Pre-populate room data used in experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint for quick filtering/sorting. From 8392d6ac3b7ff4dd924758234f0d7eeaecc3ce8f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Jul 2024 19:00:55 -0500 Subject: [PATCH 003/142] Use foreign keys --- .../schema/main/delta/87/01_sliding_sync_memberships.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index cd211bad922..646d3b7128b 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -12,7 +12,7 @@ -- . CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( - room_id TEXT NOT NULL REFERENCES rooms(room_id), + FOREIGN KEY(room_id) REFERENCES rooms(room_id), room_type TEXT, room_name TEXT, is_encrypted BOOLEAN, @@ -23,8 +23,8 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_joined_rooms_room_id ON sliding_sync_joined_rooms(room_id); CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( - membership_event_id TEXT NOT NULL REFERENCES events(event_id), - room_id TEXT NOT NULL REFERENCES rooms(room_id), + FOREIGN KEY(membership_event_id) REFERENCES events(event_id), + FOREIGN KEY(room_id) REFERENCES rooms(room_id), room_type TEXT, room_name TEXT, is_encrypted BOOLEAN, From 2b5f07d714c006ccb342a038b18dccfc9b17fe05 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 5 Aug 2024 22:34:21 -0500 Subject: [PATCH 004/142] Start of updating `sliding_sync_joined_rooms` --- synapse/storage/databases/main/events.py | 99 ++++++++++++++++++- .../delta/87/01_sliding_sync_memberships.sql | 7 +- 2 files changed, 99 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1f7acdb8592..1ce75781600 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -34,6 +34,7 @@ Optional, Set, Tuple, + Union, cast, ) @@ -1163,6 +1164,7 @@ def _update_current_state_txn( for ev_type, state_key in itertools.chain(to_delete, to_insert) if ev_type == EventTypes.Member } + members_to_cache_bust = members_changed.copy() if delta_state.no_longer_in_room: # Server is no longer in the room so we delete the room from @@ -1182,16 +1184,22 @@ def _update_current_state_txn( """ txn.execute(sql, (stream_id, self._instance_name, room_id)) + # Grab the list of users before we clear out the current state + users_in_room = self.store.get_users_in_room_txn(txn, room_id) # We also want to invalidate the membership caches for users # that were in the room. - users_in_room = self.store.get_users_in_room_txn(txn, room_id) - members_changed.update(users_in_room) + members_to_cache_bust.update(users_in_room) self.db_pool.simple_delete_txn( txn, table="current_state_events", keyvalues={"room_id": room_id}, ) + self.db_pool.simple_delete_txn( + txn, + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + ) else: # We're still in the room, so we update the current state as normal. @@ -1260,6 +1268,81 @@ def _update_current_state_txn( ], ) + # Handle updating the `sliding_sync_joined_rooms` table + sliding_sync_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + event_ids_to_fetch: List[str] = [] + create_event_id = None + room_encryption_event_id = None + room_name_event_id = None + for state_key, event_id in to_insert.items(): + if state_key[0] == EventTypes.Create: + create_event_id = event_id + event_ids_to_fetch.append(event_id) + sliding_sync_joined_rooms_insert_map["room_type"] = None + elif state_key[0] == EventTypes.RoomEncryption: + room_encryption_event_id = event_id + event_ids_to_fetch.append(event_id) + sliding_sync_joined_rooms_insert_map["is_encrypted"] = None + elif state_key[0] == EventTypes.Name: + room_name_event_id = event_id + event_ids_to_fetch.append(event_id) + sliding_sync_joined_rooms_insert_map["room_name"] = None + + # Fetch the events from the database + event_json_rows = cast( + List[Tuple[str, str]], + self.db_pool.simple_select_many_txn( + txn, + table="event_json", + column="event_id", + iterable=event_ids_to_fetch, + retcols=["event_id", "json"], + keyvalues={}, + ), + ) + # Parse the raw event JSON + for event_id, json in event_json_rows: + event_json = db_to_json(json) + + if event_id == create_event_id: + room_type = event_json.get("content", {}).get( + EventContentFields.ROOM_TYPE + ) + sliding_sync_joined_rooms_insert_map["room_type"] = room_type + elif event_id == room_encryption_event_id: + is_encrypted = event_json.get("content", {}).get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + sliding_sync_joined_rooms_insert_map["is_encrypted"] = is_encrypted + elif event_id == room_name_event_id: + room_name = event_json.get("content", {}).get("name") + sliding_sync_joined_rooms_insert_map["room_name"] = room_name + else: + raise AssertionError( + f"Unexpected event_id (we should not be fetching extra events): {event_id}" + ) + + # Update the `sliding_sync_joined_rooms` table + insert_keys, insert_values = sliding_sync_joined_rooms_insert_map.items() + if len(insert_keys) > 0: + # TODO: Should we add `bump_stamp` on insert? + txn.execute( + f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, stream_ordering, {", ".join(insert_keys)}) + VALUES ( + ?, + (SELECT stream_ordering FROM events WHERE event_id = ?) + {", ".join("?" for _ in insert_values)} + ) + ON CONFLICT (room_id) + DO UPDATE SET + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """, + ) + # We now update `local_current_membership`. We do this regardless # of whether we're still in the room or not to handle the case where # e.g. we just got banned (where we need to record that fact here). @@ -1296,6 +1379,12 @@ def _update_current_state_txn( ], ) + # We now update `sliding_sync_non_join_memberships`. We do this regardless of + # whether the server is still in the room or not because we still want a row if + # we just left/kicked or got banned from the room. + # + # TODO + txn.call_after( self.store._curr_state_delta_stream_cache.entity_has_changed, room_id, @@ -1303,12 +1392,14 @@ def _update_current_state_txn( ) # Invalidate the various caches - self.store._invalidate_state_caches_and_stream(txn, room_id, members_changed) + self.store._invalidate_state_caches_and_stream( + txn, room_id, members_to_cache_bust + ) # Check if any of the remote membership changes requires us to # unsubscribe from their device lists. self.store.handle_potentially_left_users_txn( - txn, {m for m in members_changed if not self.hs.is_mine_id(m)} + txn, {m for m in members_to_cache_bust if not self.hs.is_mine_id(m)} ) def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 646d3b7128b..be1b27fdfd3 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -11,6 +11,7 @@ -- See the GNU Affero General Public License for more details: -- . +-- Kept in sync with `current_state_events` CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( FOREIGN KEY(room_id) REFERENCES rooms(room_id), room_type TEXT, @@ -18,18 +19,18 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( is_encrypted BOOLEAN, stream_ordering: BIGINT, bump_stamp: BIGINT, + PRIMARY KEY (room_id) ); -CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_joined_rooms_room_id ON sliding_sync_joined_rooms(room_id); CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( - FOREIGN KEY(membership_event_id) REFERENCES events(event_id), FOREIGN KEY(room_id) REFERENCES rooms(room_id), + FOREIGN KEY(membership_event_id) REFERENCES events(event_id), room_type TEXT, room_name TEXT, is_encrypted BOOLEAN, stream_ordering: BIGINT, bump_stamp: BIGINT, + PRIMARY KEY (room_id, membership_event_id) ); -CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_non_join_memberships_membership_event_id ON sliding_sync_non_join_memberships(membership_event_id); From 1a251d5211f1217049661c7bcae06d99c5103d7f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 6 Aug 2024 15:18:34 -0500 Subject: [PATCH 005/142] Fill in `sliding_sync_non_join_memberships` when current state changes --- synapse/storage/databases/main/events.py | 107 ++++++++++++++++-- .../delta/87/01_sliding_sync_memberships.sql | 26 ++++- 2 files changed, 120 insertions(+), 13 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1ce75781600..3a0eebe4114 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -53,6 +53,7 @@ DatabasePool, LoggingDatabaseConnection, LoggingTransaction, + make_tuple_in_list_sql_clause, ) from synapse.storage.databases.main.event_federation import EventFederationStore from synapse.storage.databases.main.events_worker import EventCacheEntry @@ -1159,12 +1160,11 @@ def _update_current_state_txn( # We find out which membership events we may have deleted # and which we have added, then we invalidate the caches for all # those users. - members_changed = { + members_to_cache_bust = { state_key for ev_type, state_key in itertools.chain(to_delete, to_insert) if ev_type == EventTypes.Member } - members_to_cache_bust = members_changed.copy() if delta_state.no_longer_in_room: # Server is no longer in the room so we delete the room from @@ -1317,7 +1317,9 @@ def _update_current_state_txn( ) sliding_sync_joined_rooms_insert_map["is_encrypted"] = is_encrypted elif event_id == room_name_event_id: - room_name = event_json.get("content", {}).get("name") + room_name = event_json.get("content", {}).get( + EventContentFields.ROOM_NAME + ) sliding_sync_joined_rooms_insert_map["room_name"] = room_name else: raise AssertionError( @@ -1325,13 +1327,14 @@ def _update_current_state_txn( ) # Update the `sliding_sync_joined_rooms` table - insert_keys, insert_values = sliding_sync_joined_rooms_insert_map.items() + insert_keys = sliding_sync_joined_rooms_insert_map.keys() + insert_values = sliding_sync_joined_rooms_insert_map.values() if len(insert_keys) > 0: # TODO: Should we add `bump_stamp` on insert? txn.execute( f""" INSERT INTO sliding_sync_joined_rooms - (room_id, stream_ordering, {", ".join(insert_keys)}) + (room_id, event_stream_ordering, {", ".join(insert_keys)}) VALUES ( ?, (SELECT stream_ordering FROM events WHERE event_id = ?) @@ -1381,9 +1384,97 @@ def _update_current_state_txn( # We now update `sliding_sync_non_join_memberships`. We do this regardless of # whether the server is still in the room or not because we still want a row if - # we just left/kicked or got banned from the room. - # - # TODO + # a local user was just left/kicked or got banned from the room. + if to_insert: + membership_event_ids: List[str] = [] + for state_key, event_id in to_insert.items(): + if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): + membership_event_ids.append(event_id) + + # Fetch the events from the database + ( + event_type_and_state_key_in_list_clause, + event_type_and_state_key_args, + ) = make_tuple_in_list_sql_clause( + self.database_engine, + ("type", "state_key"), + [ + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + (EventTypes.Name, ""), + ], + ) + txn.execute( + f""" + SELECT event_id, type, state_key, json + FROM current_state_events + INNER JOIN event_json USING (event_id) + WHERE + room_id = ? + AND {event_type_and_state_key_in_list_clause} + """, + [room_id] + event_type_and_state_key_args, + ) + + # Parse the raw event JSON + sliding_sync_non_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + for row in txn: + event_id, event_type, state_key, json = row + event_json = db_to_json(json) + + if event_type == EventTypes.Create: + room_type = event_json.get("content", {}).get( + EventContentFields.ROOM_TYPE + ) + sliding_sync_non_joined_rooms_insert_map["room_type"] = room_type + elif event_type == EventTypes.RoomEncryption: + is_encrypted = event_json.get("content", {}).get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( + is_encrypted + ) + elif event_type == EventTypes.Name: + room_name = event_json.get("content", {}).get( + EventContentFields.ROOM_NAME + ) + sliding_sync_non_joined_rooms_insert_map["room_name"] = room_name + else: + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" + ) + + # Update the `sliding_sync_non_join_memberships` table + insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() + insert_values = sliding_sync_non_joined_rooms_insert_map.values() + # We `DO NOTHING` on conflict because if the row is already in the database, + # we just assume that it was already processed (values should be the same anyways). + txn.execute_batch( + f""" + INSERT INTO sliding_sync_non_join_memberships + (room_id, membership_event_id, user_id, membership, event_stream_ordering, {", ".join(insert_keys)}) + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?) + {", ".join("?" for _ in insert_values)} + ) + ON CONFLICT (room_id) + DO NOTHING + """, + [ + ( + room_id, + membership_event_id, + state_key[1], + membership_event_id, + membership_event_id, + ) + for membership_event_id in membership_event_ids + ], + ) txn.call_after( self.store._curr_state_delta_stream_cache.entity_has_changed, diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index be1b27fdfd3..efbc5ebd1e1 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -11,26 +11,42 @@ -- See the GNU Affero General Public License for more details: -- . --- Kept in sync with `current_state_events` +-- We store the join memberships in a separate table from +-- `sliding_sync_non_join_memberships` because the information can be shared across +-- everyone who is joined. +-- +-- This table is kept in sync with `current_state_events` CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( FOREIGN KEY(room_id) REFERENCES rooms(room_id), + -- The `stream_ordering` of the latest event in the room + event_stream_ordering BIGINT REFERENCES events(stream_ordering) + -- The `stream_ordering` of the last event according to the `bump_event_types` + bump_stamp: BIGINT, + -- `m.room.create` -> `content.type` room_type TEXT, + -- `m.room.name` -> `content.name` room_name TEXT, + -- `m.room.encryption` -> `content.algorithm` is_encrypted BOOLEAN, - stream_ordering: BIGINT, - bump_stamp: BIGINT, PRIMARY KEY (room_id) ); +-- We don't include `bump_stamp` here because we can just use the `stream_ordering` from +-- the membership event itself as the `bump_stamp`. CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( FOREIGN KEY(room_id) REFERENCES rooms(room_id), FOREIGN KEY(membership_event_id) REFERENCES events(event_id), + user_id TEXT NOT NULL, + membership TEXT NOT NULL, + -- `stream_ordering` of the `membership_event_id` + event_stream_ordering BIGINT REFERENCES events(stream_ordering) + -- `m.room.create` -> `content.type` room_type TEXT, + -- `m.room.name` -> `content.name` room_name TEXT, + -- `m.room.encryption` -> `content.algorithm` is_encrypted BOOLEAN, - stream_ordering: BIGINT, - bump_stamp: BIGINT, PRIMARY KEY (room_id, membership_event_id) ); From f96d0c36a3b07cc91d483c81cdf2bbeb0b407d56 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 6 Aug 2024 15:30:51 -0500 Subject: [PATCH 006/142] Special treatment for boolean columns See https://github.com/element-hq/synapse/blob/1dfa59b238cee0dc62163588cc9481896c288979/docs/development/database_schema.md#boolean-columns --- synapse/_scripts/synapse_port_db.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 5c6db8118fc..95c6783905a 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -129,6 +129,8 @@ "remote_media_cache": ["authenticated"], "room_stats_state": ["is_federatable"], "rooms": ["is_public", "has_auth_chain_index"], + "sliding_sync_joined_rooms": ["is_encrypted"], + "sliding_sync_non_join_memberships": ["is_encrypted"], "users": ["shadow_banned", "approved", "locked", "suspended"], "un_partial_stated_event_stream": ["rejection_status_changed"], "users_who_share_rooms": ["share_private"], From 2f3bd272840914d1aa6d88e9ec250bc1d95ceafd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 6 Aug 2024 16:50:14 -0500 Subject: [PATCH 007/142] Test is running --- synapse/storage/databases/main/events.py | 36 +++++++++++-------- synapse/storage/schema/__init__.py | 5 ++- .../delta/87/01_sliding_sync_memberships.sql | 21 ++++++----- tests/storage/test_events.py | 31 ++++++++++++++++ 4 files changed, 68 insertions(+), 25 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 3a0eebe4114..ea382b0b300 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1330,20 +1330,20 @@ def _update_current_state_txn( insert_keys = sliding_sync_joined_rooms_insert_map.keys() insert_values = sliding_sync_joined_rooms_insert_map.values() if len(insert_keys) > 0: - # TODO: Should we add `bump_stamp` on insert? + # TODO: Should we add `event_stream_ordering`, `bump_stamp` on insert? txn.execute( f""" INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, {", ".join(insert_keys)}) + (room_id, {", ".join(insert_keys)}) VALUES ( ?, - (SELECT stream_ordering FROM events WHERE event_id = ?) {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id) DO UPDATE SET {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} """, + [room_id] + list(insert_values), ) # We now update `local_current_membership`. We do this regardless @@ -1386,12 +1386,15 @@ def _update_current_state_txn( # whether the server is still in the room or not because we still want a row if # a local user was just left/kicked or got banned from the room. if to_insert: - membership_event_ids: List[str] = [] + membership_event_id_to_user_id_map: Dict[str, str] = {} for state_key, event_id in to_insert.items(): if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): - membership_event_ids.append(event_id) + membership_event_id_to_user_id_map[event_id] = state_key[1] # Fetch the events from the database + # + # TODO: We should gather this data before we delete the + # `current_state_events` in a `no_longer_in_room` situation. ( event_type_and_state_key_in_list_clause, event_type_and_state_key_args, @@ -1406,11 +1409,11 @@ def _update_current_state_txn( ) txn.execute( f""" - SELECT event_id, type, state_key, json - FROM current_state_events - INNER JOIN event_json USING (event_id) + SELECT c.event_id, c.type, c.state_key, j.json + FROM current_state_events AS c + INNER JOIN event_json AS j USING (event_id) WHERE - room_id = ? + c.room_id = ? AND {event_type_and_state_key_in_list_clause} """, [room_id] + event_type_and_state_key_args, @@ -1451,6 +1454,8 @@ def _update_current_state_txn( insert_values = sliding_sync_non_joined_rooms_insert_map.values() # We `DO NOTHING` on conflict because if the row is already in the database, # we just assume that it was already processed (values should be the same anyways). + # + # TODO: Only do this for non-join membership txn.execute_batch( f""" INSERT INTO sliding_sync_non_join_memberships @@ -1458,21 +1463,22 @@ def _update_current_state_txn( VALUES ( ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) + (SELECT stream_ordering FROM events WHERE event_id = ?), {", ".join("?" for _ in insert_values)} ) - ON CONFLICT (room_id) + ON CONFLICT (room_id, membership_event_id) DO NOTHING """, [ - ( + [ room_id, membership_event_id, - state_key[1], + user_id, membership_event_id, membership_event_id, - ) - for membership_event_id in membership_event_ids + ] + + list(insert_values) + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ], ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 581d00346bf..82790024399 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -19,7 +19,7 @@ # # -SCHEMA_VERSION = 86 # remember to update the list below when updating +SCHEMA_VERSION = 87 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -142,6 +142,9 @@ Changes in SCHEMA_VERSION = 86 - Add a column `authenticated` to the tables `local_media_repository` and `remote_media_cache` + +Changes in SCHEMA_VERSION = 87 + - TODO """ diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index efbc5ebd1e1..d496a5a91a0 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -17,11 +17,11 @@ -- -- This table is kept in sync with `current_state_events` CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( - FOREIGN KEY(room_id) REFERENCES rooms(room_id), + room_id TEXT NOT NULL REFERENCES rooms(room_id), -- The `stream_ordering` of the latest event in the room - event_stream_ordering BIGINT REFERENCES events(stream_ordering) + event_stream_ordering BIGINT REFERENCES events(stream_ordering), -- The `stream_ordering` of the last event according to the `bump_event_types` - bump_stamp: BIGINT, + bump_stamp BIGINT, -- `m.room.create` -> `content.type` room_type TEXT, -- `m.room.name` -> `content.name` @@ -35,17 +35,20 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( -- We don't include `bump_stamp` here because we can just use the `stream_ordering` from -- the membership event itself as the `bump_stamp`. CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( - FOREIGN KEY(room_id) REFERENCES rooms(room_id), - FOREIGN KEY(membership_event_id) REFERENCES events(event_id), + room_id TEXT NOT NULL REFERENCES rooms(room_id), + membership_event_id TEXT NOT NULL REFERENCES events(event_id), user_id TEXT NOT NULL, membership TEXT NOT NULL, -- `stream_ordering` of the `membership_event_id` - event_stream_ordering BIGINT REFERENCES events(stream_ordering) - -- `m.room.create` -> `content.type` + event_stream_ordering BIGINT REFERENCES events(stream_ordering), + -- `m.room.create` -> `content.type` (according to the current state at the time of + -- the membership) room_type TEXT, - -- `m.room.name` -> `content.name` + -- `m.room.name` -> `content.name` (according to the current state at the time of + -- the membership) room_name TEXT, - -- `m.room.encryption` -> `content.algorithm` + -- `m.room.encryption` -> `content.algorithm` (according to the current state at the + -- time of the membership) is_encrypted BOOLEAN, PRIMARY KEY (room_id, membership_event_id) ); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 0a7c4c94214..bc37acfea71 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -19,6 +19,7 @@ # # +import logging from typing import List, Optional from twisted.test.proto_helpers import MemoryReactor @@ -35,6 +36,8 @@ from tests.unittest import HomeserverTestCase +logger = logging.getLogger(__name__) + class ExtremPruneTestCase(HomeserverTestCase): servlets = [ @@ -481,3 +484,31 @@ def test_room_remote_user_cache_invalidated(self) -> None: users = self.get_success(self.store.get_users_in_room(room_id)) self.assertEqual(users, []) + + +class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): + """ + Tests to make sure the + `sliding_sync_joined_rooms`/`sliding_sync_non_join_memberships` database tables are + populated correctly. + """ + + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def test_rooms_invite_shared_history_initial_sync(self) -> None: + """ + TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # user1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) From cb335805d47696b9bef64b3d6af73f7cebfd9823 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 10:46:34 -0500 Subject: [PATCH 008/142] Server left room test --- synapse/storage/databases/main/events.py | 7 +++++++ tests/storage/test_events.py | 26 ++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ea382b0b300..ef73740ef8a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1155,6 +1155,13 @@ def _update_current_state_txn( to_delete = delta_state.to_delete to_insert = delta_state.to_insert + logger.info( + "asdf _update_current_state_txn no_longer_in_room=%s to_insert=%s to_delete=%s", + delta_state.no_longer_in_room, + to_insert, + to_delete, + ) + # Figure out the changes of membership to invalidate the # `get_rooms_for_user` cache. # We find out which membership events we may have deleted diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index bc37acfea71..39f55022dda 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -499,7 +499,7 @@ class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): room.register_servlets, ] - def test_rooms_invite_shared_history_initial_sync(self) -> None: + def test_TODO(self) -> None: """ TODO """ @@ -510,5 +510,27 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - # user1 joins the room + # User1 joins the room self.helper.join(room_id1, user1_id, tok=user1_tok) + + def test_server_left_room(self) -> None: + """ + TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # User2 leaves the room + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + # User1 leaves the room + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + # TODO: Server left room test From 87d95615d4fdc199f9196c19eb519781e78739ca Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 16:37:17 -0500 Subject: [PATCH 009/142] Change to updating the latest membership in the room --- synapse/storage/databases/main/events.py | 196 ++++++++++-------- .../delta/87/01_sliding_sync_memberships.sql | 6 +- 2 files changed, 113 insertions(+), 89 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ef73740ef8a..6f9ab3c31fb 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -42,7 +42,12 @@ from prometheus_client import Counter import synapse.metrics -from synapse.api.constants import EventContentFields, EventTypes, RelationTypes +from synapse.api.constants import ( + EventContentFields, + EventTypes, + RelationTypes, + Membership, +) from synapse.api.errors import PartialStateConflictError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event @@ -1392,102 +1397,119 @@ def _update_current_state_txn( # We now update `sliding_sync_non_join_memberships`. We do this regardless of # whether the server is still in the room or not because we still want a row if # a local user was just left/kicked or got banned from the room. + if to_delete: + txn.execute_batch( + "DELETE FROM sliding_sync_non_join_memberships" + " WHERE room_id = ? AND user_id = ?", + ( + (room_id, state_key) + for event_type, state_key in to_delete + if event_type == EventTypes.Member and self.is_mine_id(state_key) + ), + ) + if to_insert: membership_event_id_to_user_id_map: Dict[str, str] = {} for state_key, event_id in to_insert.items(): if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): membership_event_id_to_user_id_map[event_id] = state_key[1] - # Fetch the events from the database - # - # TODO: We should gather this data before we delete the - # `current_state_events` in a `no_longer_in_room` situation. - ( - event_type_and_state_key_in_list_clause, - event_type_and_state_key_args, - ) = make_tuple_in_list_sql_clause( - self.database_engine, - ("type", "state_key"), - [ - (EventTypes.Create, ""), - (EventTypes.RoomEncryption, ""), - (EventTypes.Name, ""), - ], - ) - txn.execute( - f""" - SELECT c.event_id, c.type, c.state_key, j.json - FROM current_state_events AS c - INNER JOIN event_json AS j USING (event_id) - WHERE - c.room_id = ? - AND {event_type_and_state_key_in_list_clause} - """, - [room_id] + event_type_and_state_key_args, - ) + if len(membership_event_id_to_user_id_map) > 0: + # Fetch the events from the database + # + # TODO: We should gather this data before we delete the + # `current_state_events` in a `no_longer_in_room` situation. + ( + event_type_and_state_key_in_list_clause, + event_type_and_state_key_args, + ) = make_tuple_in_list_sql_clause( + self.database_engine, + ("type", "state_key"), + [ + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + (EventTypes.Name, ""), + ], + ) + txn.execute( + f""" + SELECT c.event_id, c.type, c.state_key, j.json + FROM current_state_events AS c + INNER JOIN event_json AS j USING (event_id) + WHERE + c.room_id = ? + AND {event_type_and_state_key_in_list_clause} + """, + [room_id] + event_type_and_state_key_args, + ) - # Parse the raw event JSON - sliding_sync_non_joined_rooms_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} - for row in txn: - event_id, event_type, state_key, json = row - event_json = db_to_json(json) + # Parse the raw event JSON + sliding_sync_non_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + for row in txn: + event_id, event_type, state_key, json = row + event_json = db_to_json(json) + + if event_type == EventTypes.Create: + room_type = event_json.get("content", {}).get( + EventContentFields.ROOM_TYPE + ) + sliding_sync_non_joined_rooms_insert_map["room_type"] = ( + room_type + ) + elif event_type == EventTypes.RoomEncryption: + is_encrypted = event_json.get("content", {}).get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( + is_encrypted + ) + elif event_type == EventTypes.Name: + room_name = event_json.get("content", {}).get( + EventContentFields.ROOM_NAME + ) + sliding_sync_non_joined_rooms_insert_map["room_name"] = ( + room_name + ) + else: + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" + ) - if event_type == EventTypes.Create: - room_type = event_json.get("content", {}).get( - EventContentFields.ROOM_TYPE - ) - sliding_sync_non_joined_rooms_insert_map["room_type"] = room_type - elif event_type == EventTypes.RoomEncryption: - is_encrypted = event_json.get("content", {}).get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( - is_encrypted - ) - elif event_type == EventTypes.Name: - room_name = event_json.get("content", {}).get( - EventContentFields.ROOM_NAME - ) - sliding_sync_non_joined_rooms_insert_map["room_name"] = room_name - else: - raise AssertionError( - f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" + # Update the `sliding_sync_non_join_memberships` table + insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() + insert_values = sliding_sync_non_joined_rooms_insert_map.values() + # TODO: Only do this for non-join membership + txn.execute_batch( + f""" + INSERT INTO sliding_sync_non_join_memberships + (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?), + {", ".join("?" for _ in insert_values)} ) - - # Update the `sliding_sync_non_join_memberships` table - insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() - insert_values = sliding_sync_non_joined_rooms_insert_map.values() - # We `DO NOTHING` on conflict because if the row is already in the database, - # we just assume that it was already processed (values should be the same anyways). - # - # TODO: Only do this for non-join membership - txn.execute_batch( - f""" - INSERT INTO sliding_sync_non_join_memberships - (room_id, membership_event_id, user_id, membership, event_stream_ordering, {", ".join(insert_keys)}) - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in insert_values)} - ) - ON CONFLICT (room_id, membership_event_id) - DO NOTHING - """, - [ + ON CONFLICT (room_id, user_id) + DO UPDATE SET + membership_event_id = EXCLUDED.membership_event_id, + membership = EXCLUDED.membership, + event_stream_ordering = EXCLUDED.event_stream_ordering, + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """, [ - room_id, - membership_event_id, - user_id, - membership_event_id, - membership_event_id, - ] - + list(insert_values) - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - ], - ) + [ + room_id, + user_id, + membership_event_id, + membership_event_id, + membership_event_id, + ] + + list(insert_values) + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() + ], + ) txn.call_after( self.store._curr_state_delta_stream_cache.entity_has_changed, diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index d496a5a91a0..1afdecd8e9f 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -36,8 +36,8 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( -- the membership event itself as the `bump_stamp`. CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( room_id TEXT NOT NULL REFERENCES rooms(room_id), - membership_event_id TEXT NOT NULL REFERENCES events(event_id), user_id TEXT NOT NULL, + membership_event_id TEXT NOT NULL REFERENCES events(event_id), membership TEXT NOT NULL, -- `stream_ordering` of the `membership_event_id` event_stream_ordering BIGINT REFERENCES events(stream_ordering), @@ -50,6 +50,8 @@ CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( -- `m.room.encryption` -> `content.algorithm` (according to the current state at the -- time of the membership) is_encrypted BOOLEAN, - PRIMARY KEY (room_id, membership_event_id) + PRIMARY KEY (room_id, user_id) ); +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_non_join_memberships_event_stream_ordering ON sliding_sync_non_join_memberships(event_stream_ordering); +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_non_join_memberships_membership_event_id ON sliding_sync_non_join_memberships(membership_event_id); From 61cea4e9b7f3a6f0c1f6748a020fa4089efea772 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 18:07:53 -0500 Subject: [PATCH 010/142] Closer to right --- synapse/storage/databases/main/events.py | 23 ++-- tests/storage/test_events.py | 144 ++++++++++++++++++++++- 2 files changed, 156 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 6f9ab3c31fb..913b6f2b876 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1324,9 +1324,10 @@ def _update_current_state_txn( ) sliding_sync_joined_rooms_insert_map["room_type"] = room_type elif event_id == room_encryption_event_id: - is_encrypted = event_json.get("content", {}).get( + encryption_algorithm = event_json.get("content", {}).get( EventContentFields.ENCRYPTION_ALGORITHM ) + is_encrypted = encryption_algorithm is not None sliding_sync_joined_rooms_insert_map["is_encrypted"] = is_encrypted elif event_id == room_name_event_id: room_name = event_json.get("content", {}).get( @@ -1459,9 +1460,10 @@ def _update_current_state_txn( room_type ) elif event_type == EventTypes.RoomEncryption: - is_encrypted = event_json.get("content", {}).get( + encryption_algorithm = event_json.get("content", {}).get( EventContentFields.ENCRYPTION_ALGORITHM ) + is_encrypted = encryption_algorithm is not None sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( is_encrypted ) @@ -1483,14 +1485,18 @@ def _update_current_state_txn( # TODO: Only do this for non-join membership txn.execute_batch( f""" + WITH data_table (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) AS ( + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?), + {", ".join("?" for _ in insert_values)} + ) + ) INSERT INTO sliding_sync_non_join_memberships (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in insert_values)} - ) + SELECT * FROM data_table + WHERE membership != ? ON CONFLICT (room_id, user_id) DO UPDATE SET membership_event_id = EXCLUDED.membership_event_id, @@ -1505,6 +1511,7 @@ def _update_current_state_txn( membership_event_id, membership_event_id, membership_event_id, + Membership.JOIN, ] + list(insert_values) for membership_event_id, user_id in membership_event_id_to_user_id_map.items() diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 39f55022dda..7639eded2d5 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -24,7 +24,7 @@ from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, Membership, EventContentFields, RoomTypes from synapse.api.room_versions import RoomVersions from synapse.events import EventBase from synapse.federation.federation_base import event_from_pdu_json @@ -499,7 +499,46 @@ class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): room.register_servlets, ] - def test_TODO(self) -> None: + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.store = self.hs.get_datastores().main + + def test_room_with_no_info(self) -> None: + """ + Test room that doesn't have a room type, encryption, or name. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) + + sliding_sync_joined_rooms_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_joined_rooms", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_joined_rooms %s", + sliding_sync_joined_rooms_results, + ) + + sliding_sync_non_join_memberships_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_non_join_memberships", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_non_join_memberships %s", + sliding_sync_non_join_memberships_results, + ) + + def test_room_with_info(self) -> None: """ TODO """ @@ -509,10 +548,91 @@ def test_TODO(self) -> None: user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id1, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) # User1 joins the room self.helper.join(room_id1, user1_id, tok=user1_tok) + sliding_sync_joined_rooms_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_joined_rooms", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_joined_rooms %s", + sliding_sync_joined_rooms_results, + ) + + sliding_sync_non_join_memberships_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_non_join_memberships", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_non_join_memberships %s", + sliding_sync_non_join_memberships_results, + ) + + def test_space_room_with_info(self) -> None: + """ + TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + space_room_id = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(space_room_id, user1_id, tok=user1_tok) + + sliding_sync_joined_rooms_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_joined_rooms", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_joined_rooms %s", + sliding_sync_joined_rooms_results, + ) + + sliding_sync_non_join_memberships_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_non_join_memberships", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_non_join_memberships %s", + sliding_sync_non_join_memberships_results, + ) + def test_server_left_room(self) -> None: """ TODO @@ -533,4 +653,22 @@ def test_server_left_room(self) -> None: # User1 leaves the room self.helper.leave(room_id1, user1_id, tok=user1_tok) - # TODO: Server left room test + sliding_sync_joined_rooms_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_joined_rooms", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_joined_rooms %s", + sliding_sync_joined_rooms_results, + ) + + sliding_sync_non_join_memberships_results = self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_non_join_memberships", None, retcols=("*",) + ) + ) + logger.info( + "sliding_sync_non_join_memberships %s", + sliding_sync_non_join_memberships_results, + ) From 68a3daf605878e87a075400b390357d0bfbcc74a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 18:10:24 -0500 Subject: [PATCH 011/142] Fix comparison and insert --- synapse/storage/databases/main/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 913b6f2b876..93439e72a02 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1511,9 +1511,9 @@ def _update_current_state_txn( membership_event_id, membership_event_id, membership_event_id, - Membership.JOIN, ] + list(insert_values) + + [Membership.JOIN] for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ], ) From 5b1053f23e80550b3e8ac52f1131c7000d6c9374 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 19:07:43 -0500 Subject: [PATCH 012/142] Better test assertions --- synapse/storage/databases/main/events.py | 11 +- .../delta/87/01_sliding_sync_memberships.sql | 10 +- tests/storage/test_events.py | 266 +++++++++++++----- tests/unittest.py | 4 +- 4 files changed, 212 insertions(+), 79 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 93439e72a02..c5976b3f8c7 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -45,8 +45,8 @@ from synapse.api.constants import ( EventContentFields, EventTypes, - RelationTypes, Membership, + RelationTypes, ) from synapse.api.errors import PartialStateConflictError from synapse.api.room_versions import RoomVersions @@ -1281,9 +1281,6 @@ def _update_current_state_txn( ) # Handle updating the `sliding_sync_joined_rooms` table - sliding_sync_joined_rooms_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} event_ids_to_fetch: List[str] = [] create_event_id = None room_encryption_event_id = None @@ -1292,15 +1289,12 @@ def _update_current_state_txn( if state_key[0] == EventTypes.Create: create_event_id = event_id event_ids_to_fetch.append(event_id) - sliding_sync_joined_rooms_insert_map["room_type"] = None elif state_key[0] == EventTypes.RoomEncryption: room_encryption_event_id = event_id event_ids_to_fetch.append(event_id) - sliding_sync_joined_rooms_insert_map["is_encrypted"] = None elif state_key[0] == EventTypes.Name: room_name_event_id = event_id event_ids_to_fetch.append(event_id) - sliding_sync_joined_rooms_insert_map["room_name"] = None # Fetch the events from the database event_json_rows = cast( @@ -1315,6 +1309,9 @@ def _update_current_state_txn( ), ) # Parse the raw event JSON + sliding_sync_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} for event_id, json in event_json_rows: event_json = db_to_json(json) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 1afdecd8e9f..61ea65aba21 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -22,12 +22,12 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( event_stream_ordering BIGINT REFERENCES events(stream_ordering), -- The `stream_ordering` of the last event according to the `bump_event_types` bump_stamp BIGINT, - -- `m.room.create` -> `content.type` + -- `m.room.create` -> `content.type` (current state) room_type TEXT, - -- `m.room.name` -> `content.name` + -- `m.room.name` -> `content.name` (current state) room_name TEXT, - -- `m.room.encryption` -> `content.algorithm` - is_encrypted BOOLEAN, + -- `m.room.encryption` -> `content.algorithm` (current state) + is_encrypted BOOLEAN DEFAULT 0 NOT NULL, PRIMARY KEY (room_id) ); @@ -49,7 +49,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( room_name TEXT, -- `m.room.encryption` -> `content.algorithm` (according to the current state at the -- time of the membership) - is_encrypted BOOLEAN, + is_encrypted BOOLEAN DEFAULT 0 NOT NULL, PRIMARY KEY (room_id, user_id) ); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 7639eded2d5..13214eb041e 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -20,11 +20,13 @@ # import logging -from typing import List, Optional +from typing import Dict, List, Optional, Tuple, cast + +import attr from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import EventTypes, Membership, EventContentFields, RoomTypes +from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes from synapse.api.room_versions import RoomVersions from synapse.events import EventBase from synapse.federation.federation_base import event_from_pdu_json @@ -486,6 +488,28 @@ def test_room_remote_user_cache_invalidated(self) -> None: self.assertEqual(users, []) +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _SlidingSyncJoinedRoomResult: + room_id: str + event_stream_ordering: int + bump_stamp: int + room_type: str + room_name: str + is_encrypted: bool + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _SlidingSyncNonJoinMembershipResult: + room_id: str + user_id: str + membership_event_id: str + membership: str + event_stream_ordering: int + room_type: str + room_name: str + is_encrypted: bool + + class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): """ Tests to make sure the @@ -504,9 +528,89 @@ def prepare( ) -> None: self.store = self.hs.get_datastores().main - def test_room_with_no_info(self) -> None: + def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResult]: + """ + Return the rows from the `sliding_sync_joined_rooms` table. + + Returns: + Mapping from room_id to _SlidingSyncJoinedRoomResult. + """ + rows = cast( + List[Tuple[str, int, int, str, str, bool]], + self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_joined_rooms", + None, + retcols=( + "room_id", + "event_stream_ordering", + "bump_stamp", + "room_type", + "room_name", + "is_encrypted", + ), + ), + ), + ) + + return { + row[0]: _SlidingSyncJoinedRoomResult( + room_id=row[0], + event_stream_ordering=row[1], + bump_stamp=row[2], + room_type=row[3], + room_name=row[4], + is_encrypted=row[5], + ) + for row in rows + } + + def _get_sliding_sync_non_join_memberships( + self, + ) -> Dict[Tuple[str, str], _SlidingSyncNonJoinMembershipResult]: + """ + Return the rows from the `sliding_sync_non_join_memberships` table. + + Returns: + Mapping from the (room_id, user_id) to _SlidingSyncNonJoinMembershipResult. + """ + rows = cast( + List[Tuple[str, str, str, str, int, str, str, bool]], + self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_non_join_memberships", + None, + retcols=( + "room_id", + "user_id", + "membership_event_id", + "membership", + "event_stream_ordering", + "room_type", + "room_name", + "is_encrypted", + ), + ), + ), + ) + + return { + (row[0], row[1]): _SlidingSyncNonJoinMembershipResult( + room_id=row[0], + user_id=row[1], + membership_event_id=row[2], + membership=row[3], + event_stream_ordering=row[4], + room_type=row[5], + room_name=row[6], + is_encrypted=row[7], + ) + for row in rows + } + + def test_joined_room_with_no_info(self) -> None: """ - Test room that doesn't have a room type, encryption, or name. + Test joined room that doesn't have a room type, encryption, or name. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -518,27 +622,35 @@ def test_room_with_no_info(self) -> None: # User1 joins the room self.helper.join(room_id1, user1_id, tok=user1_tok) - sliding_sync_joined_rooms_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_joined_rooms", None, retcols=("*",) - ) + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, ) - logger.info( - "sliding_sync_joined_rooms %s", - sliding_sync_joined_rooms_results, + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=None, + room_name=None, + is_encrypted=False, + ), ) - sliding_sync_non_join_memberships_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_non_join_memberships", None, retcols=("*",) - ) + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() ) - logger.info( - "sliding_sync_non_join_memberships %s", - sliding_sync_non_join_memberships_results, + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, ) - def test_room_with_info(self) -> None: + def test_joined_room_with_info(self) -> None: """ TODO """ @@ -566,27 +678,35 @@ def test_room_with_info(self) -> None: # User1 joins the room self.helper.join(room_id1, user1_id, tok=user1_tok) - sliding_sync_joined_rooms_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_joined_rooms", None, retcols=("*",) - ) + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, ) - logger.info( - "sliding_sync_joined_rooms %s", - sliding_sync_joined_rooms_results, + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), ) - sliding_sync_non_join_memberships_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_non_join_memberships", None, retcols=("*",) - ) + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() ) - logger.info( - "sliding_sync_non_join_memberships %s", - sliding_sync_non_join_memberships_results, + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, ) - def test_space_room_with_info(self) -> None: + def test_joined_space_room_with_info(self) -> None: """ TODO """ @@ -613,24 +733,32 @@ def test_space_room_with_info(self) -> None: # User1 joins the room self.helper.join(space_room_id, user1_id, tok=user1_tok) - sliding_sync_joined_rooms_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_joined_rooms", None, retcols=("*",) - ) + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {space_room_id}, + exact=True, ) - logger.info( - "sliding_sync_joined_rooms %s", - sliding_sync_joined_rooms_results, + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), ) - sliding_sync_non_join_memberships_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_non_join_memberships", None, retcols=("*",) - ) + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() ) - logger.info( - "sliding_sync_non_join_memberships %s", - sliding_sync_non_join_memberships_results, + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, ) def test_server_left_room(self) -> None: @@ -648,27 +776,35 @@ def test_server_left_room(self) -> None: self.helper.join(room_id1, user1_id, tok=user1_tok) # User2 leaves the room - self.helper.leave(room_id1, user2_id, tok=user2_tok) + leave_response2 = self.helper.leave(room_id1, user2_id, tok=user2_tok) # User1 leaves the room - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) - sliding_sync_joined_rooms_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_joined_rooms", None, retcols=("*",) - ) - ) - logger.info( - "sliding_sync_joined_rooms %s", - sliding_sync_joined_rooms_results, + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, ) - sliding_sync_non_join_memberships_results = self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_non_join_memberships", None, retcols=("*",) - ) + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() ) - logger.info( - "sliding_sync_non_join_memberships %s", - sliding_sync_non_join_memberships_results, + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + { + _SlidingSyncNonJoinMembershipResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=leave_response1["event_id"], + membership=Membership.LEAVE, + # TODO + event_stream_ordering=None, + room_type=None, + room_name=None, + is_encrypted=False, + ) + }, + exact=True, ) diff --git a/tests/unittest.py b/tests/unittest.py index 4aa7f561060..2532fa49fba 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -272,8 +272,8 @@ def assert_dict(self, required: Mapping, actual: Mapping) -> None: def assertIncludes( self, - actual_items: AbstractSet[str], - expected_items: AbstractSet[str], + actual_items: AbstractSet[TV], + expected_items: AbstractSet[TV], exact: bool = False, message: Optional[str] = None, ) -> None: From c590474757b4d9210d31ae4b2ec33968c7d7a1aa Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 19:24:58 -0500 Subject: [PATCH 013/142] Test non-joins --- tests/storage/test_events.py | 139 ++++++++++++++++++++++++++++++++--- 1 file changed, 128 insertions(+), 11 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 13214eb041e..25fd2622939 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -610,7 +610,8 @@ def _get_sliding_sync_non_join_memberships( def test_joined_room_with_no_info(self) -> None: """ - Test joined room that doesn't have a room type, encryption, or name. + Test joined room that doesn't have a room type, encryption, or name shows up in + `sliding_sync_joined_rooms`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -652,7 +653,7 @@ def test_joined_room_with_no_info(self) -> None: def test_joined_room_with_info(self) -> None: """ - TODO + Test joined encrypted room with name shows up in `sliding_sync_joined_rooms`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -708,7 +709,7 @@ def test_joined_room_with_info(self) -> None: def test_joined_space_room_with_info(self) -> None: """ - TODO + Test joined space room with name shows up in `sliding_sync_joined_rooms`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -761,9 +762,107 @@ def test_joined_space_room_with_info(self) -> None: exact=True, ) - def test_server_left_room(self) -> None: + # TODO: Test non-join, no info + + # TODO: Test info filled out for non-joins + + def test_non_join_invite_ban(self) -> None: """ - TODO + Test users who have invite/ban membership in room shows up in + `sliding_sync_non_join_memberships`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + user3_tok = self.login(user3_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to the room + user1_invited_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invited_event_pos = self.get_success( + self.store.get_position_for_event(user1_invited_response["event_id"]) + ) + + # User3 joins the room + self.helper.join(room_id1, user3_id, tok=user3_tok) + # User3 is banned from the room + user3_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user3_id, tok=user2_tok + ) + user3_ban_event_pos = self.get_success( + self.store.get_position_for_event(user3_ban_response["event_id"]) + ) + + # User2 is still joined to the room so we should still have an entry + # in the `sliding_sync_joined_rooms` table. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() + ) + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + { + (room_id1, user1_id), + (room_id1, user3_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_non_join_memberships_results.get((room_id1, user1_id)), + _SlidingSyncNonJoinMembershipResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_invited_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=user1_invited_event_pos.stream, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_non_join_memberships_results.get((room_id1, user3_id)), + _SlidingSyncNonJoinMembershipResult( + room_id=room_id1, + user_id=user3_id, + membership_event_id=user3_ban_response["event_id"], + membership=Membership.BAN, + event_stream_ordering=user3_ban_event_pos.stream, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + + # TODO: Test remote invite + + def test_non_join_server_left_room(self) -> None: + """ + Test everyone local leaves the room but their leave membership still shows up in + `sliding_sync_non_join_memberships`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -776,11 +875,19 @@ def test_server_left_room(self) -> None: self.helper.join(room_id1, user1_id, tok=user1_tok) # User2 leaves the room - leave_response2 = self.helper.leave(room_id1, user2_id, tok=user2_tok) + user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) + user2_leave_event_pos = self.get_success( + self.store.get_position_for_event(user2_leave_response["event_id"]) + ) # User1 leaves the room - leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) + user1_leave_event_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + # No one is joined to the room anymore so we shouldn't have an entry in the + # `sliding_sync_joined_rooms` table. sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( set(sliding_sync_joined_rooms_results.keys()), @@ -788,6 +895,7 @@ def test_server_left_room(self) -> None: exact=True, ) + # We should still see rows for the leave events (non-joins) sliding_sync_non_join_memberships_results = ( self._get_sliding_sync_non_join_memberships() ) @@ -797,14 +905,23 @@ def test_server_left_room(self) -> None: _SlidingSyncNonJoinMembershipResult( room_id=room_id1, user_id=user1_id, - membership_event_id=leave_response1["event_id"], + membership_event_id=user1_leave_response["event_id"], membership=Membership.LEAVE, - # TODO - event_stream_ordering=None, + event_stream_ordering=user1_leave_event_pos.stream, room_type=None, room_name=None, is_encrypted=False, - ) + ), + _SlidingSyncNonJoinMembershipResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=user2_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user2_leave_event_pos.stream, + room_type=None, + room_name=None, + is_encrypted=False, + ), }, exact=True, ) From a1aaa47dad27a93915bcd0cceaf7001df577418e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 19:58:51 -0500 Subject: [PATCH 014/142] Add more tests --- tests/storage/test_events.py | 188 ++++++++++++++++++++++++++++++++++- 1 file changed, 186 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 25fd2622939..50949b0b630 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -707,6 +707,100 @@ def test_joined_room_with_info(self) -> None: exact=True, ) + def test_joined_room_with_info_updated(self) -> None: + """ + Test info in `sliding_sync_joined_rooms` is updated when the current state is + updated. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() + ) + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, + ) + + # Update the room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room was renamed"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id1, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + # Make sure we see the new room name + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=None, + room_name="my super duper room was renamed", + is_encrypted=True, + ), + ) + + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() + ) + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, + ) + def test_joined_space_room_with_info(self) -> None: """ Test joined space room with name shows up in `sliding_sync_joined_rooms`. @@ -762,9 +856,99 @@ def test_joined_space_room_with_info(self) -> None: exact=True, ) - # TODO: Test non-join, no info + def test_non_join_space_room_with_info(self) -> None: + """ + Test users who was invited shows up in `sliding_sync_non_join_memberships`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + space_room_id = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + space_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + + # User1 is invited to the room + user1_invited_response = self.helper.invite( + space_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invited_event_pos = self.get_success( + self.store.get_position_for_event(user1_invited_response["event_id"]) + ) + + # Update the room name after we are invited just to make sure + # we don't update non-join memberships when the room name changes. + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space was renamed"}, + tok=user2_tok, + ) + + # User2 is still joined to the room so we should still have an entry in the + # `sliding_sync_joined_rooms` table. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {space_room_id}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + # TODO + event_stream_ordering=None, + bump_stamp=None, + room_type=RoomTypes.SPACE, + room_name="my super duper space was renamed", + is_encrypted=True, + ), + ) - # TODO: Test info filled out for non-joins + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() + ) + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + { + (space_room_id, user1_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was invited + self.assertEqual( + sliding_sync_non_join_memberships_results.get((space_room_id, user1_id)), + _SlidingSyncNonJoinMembershipResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=user1_invited_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=user1_invited_event_pos.stream, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=True, + ), + ) def test_non_join_invite_ban(self) -> None: """ From bf78692ba05db1e8bf1f0711af3a5af70438908a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 20:09:53 -0500 Subject: [PATCH 015/142] Handle to_delete --- synapse/storage/databases/main/events.py | 31 +++++++++++++++++------- tests/storage/test_events.py | 4 +++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c5976b3f8c7..ac77492e181 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1286,16 +1286,30 @@ def _update_current_state_txn( room_encryption_event_id = None room_name_event_id = None for state_key, event_id in to_insert.items(): - if state_key[0] == EventTypes.Create: + if state_key[0] == EventTypes.Create and state_key[1] == "": create_event_id = event_id event_ids_to_fetch.append(event_id) - elif state_key[0] == EventTypes.RoomEncryption: + elif state_key[0] == EventTypes.RoomEncryption and state_key[1] == "": room_encryption_event_id = event_id event_ids_to_fetch.append(event_id) - elif state_key[0] == EventTypes.Name: + elif state_key[0] == EventTypes.Name and state_key[1] == "": room_name_event_id = event_id event_ids_to_fetch.append(event_id) + # Map of values to insert/update in the `sliding_sync_joined_rooms` table + sliding_sync_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + + # If something is being deleted from the state, we need to clear it out + for event_type, state_key in to_delete: + if event_type == EventTypes.Create and state_key == "": + sliding_sync_joined_rooms_insert_map["room_type"] = None + elif event_type == EventTypes.RoomEncryption and state_key == "": + sliding_sync_joined_rooms_insert_map["is_encrypted"] = False + elif event_type == EventTypes.Name and state_key == "": + sliding_sync_joined_rooms_insert_map["room_name"] = None + # Fetch the events from the database event_json_rows = cast( List[Tuple[str, str]], @@ -1309,9 +1323,6 @@ def _update_current_state_txn( ), ) # Parse the raw event JSON - sliding_sync_joined_rooms_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} for event_id, json in event_json_rows: event_json = db_to_json(json) @@ -1413,6 +1424,11 @@ def _update_current_state_txn( membership_event_id_to_user_id_map[event_id] = state_key[1] if len(membership_event_id_to_user_id_map) > 0: + # Map of values to insert/update in the `sliding_sync_non_join_memberships` table + sliding_sync_non_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + # Fetch the events from the database # # TODO: We should gather this data before we delete the @@ -1442,9 +1458,6 @@ def _update_current_state_txn( ) # Parse the raw event JSON - sliding_sync_non_joined_rooms_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} for row in txn: event_id, event_type, state_key, json = row event_json = db_to_json(json) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 50949b0b630..ea388458b68 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -856,6 +856,8 @@ def test_joined_space_room_with_info(self) -> None: exact=True, ) + # TODO: test_joined_room_state_reset + def test_non_join_space_room_with_info(self) -> None: """ Test users who was invited shows up in `sliding_sync_non_join_memberships`. @@ -1109,3 +1111,5 @@ def test_non_join_server_left_room(self) -> None: }, exact=True, ) + + # TODO: test_non_join_state_reset From 5cf3ad3d7fde5a2a8b8f949ef5a82b68c30878dc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 20:47:13 -0500 Subject: [PATCH 016/142] Handle server left room --- synapse/storage/databases/main/events.py | 295 +++++++++++++---------- tests/storage/test_events.py | 52 ++-- 2 files changed, 199 insertions(+), 148 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ac77492e181..843dc227528 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -66,7 +66,13 @@ from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator -from synapse.types import JsonDict, StateMap, StrCollection, get_domain_from_id +from synapse.types import ( + JsonDict, + MutableStateMap, + StateMap, + StrCollection, + get_domain_from_id, +) from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.stringutils import non_null_str_or_none @@ -1178,6 +1184,168 @@ def _update_current_state_txn( if ev_type == EventTypes.Member } + # We now update `sliding_sync_non_join_memberships`. + # + # This would only happen if someone was state reset out of the room + if to_delete: + txn.execute_batch( + "DELETE FROM sliding_sync_non_join_memberships" + " WHERE room_id = ? AND user_id = ?", + ( + (room_id, state_key) + for event_type, state_key in to_delete + if event_type == EventTypes.Member and self.is_mine_id(state_key) + ), + ) + + # We handle `sliding_sync_non_join_memberships` before `current_state_events` so + # we can gather the current state before it might be deleted if we are + # `no_longer_in_room`. + # + # We do this regardless of whether the server is `no_longer_in_room` or not + # because we still want a row if a local user was just left/kicked or got banned + # from the room. + if to_insert: + membership_event_id_to_user_id_map: Dict[str, str] = {} + for state_key, event_id in to_insert.items(): + if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): + membership_event_id_to_user_id_map[event_id] = state_key[1] + + if len(membership_event_id_to_user_id_map) > 0: + # Map of values to insert/update in the `sliding_sync_non_join_memberships` table + sliding_sync_non_joined_rooms_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + + relevant_state_set = { + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + (EventTypes.Name, ""), + } + + # Fetch the current state event IDs from the database + ( + event_type_and_state_key_in_list_clause, + event_type_and_state_key_args, + ) = make_tuple_in_list_sql_clause( + self.database_engine, + ("type", "state_key"), + relevant_state_set, + ) + txn.execute( + f""" + SELECT c.event_id, c.type, c.state_key + FROM current_state_events AS c + WHERE + c.room_id = ? + AND {event_type_and_state_key_in_list_clause} + """, + [room_id] + event_type_and_state_key_args, + ) + current_state_map: MutableStateMap[str] = { + (event_type, state_key): event_id + for event_id, event_type, state_key in txn + } + # Since we fetched the current state before we took `to_insert`/`to_delete` + # into account, we need to do a couple fixups. + # + # Update the current_state_map with what we have `to_delete` + for state_key in to_delete: + current_state_map.pop(state_key, None) + # Update the current_state_map with what we have `to_insert` + for state_key, event_id in to_insert.items(): + if state_key in relevant_state_set: + current_state_map[state_key] = event_id + + # Fetch the raw event JSON from the database + ( + event_id_in_list_clause, + event_id_args, + ) = make_in_list_sql_clause( + self.database_engine, + "event_id", + current_state_map.values(), + ) + txn.execute( + f""" + SELECT event_id, type, state_key, json FROM event_json + INNER JOIN events USING (event_id) + WHERE {event_id_in_list_clause} + """, + event_id_args, + ) + + # Parse the raw event JSON + for row in txn: + event_id, event_type, state_key, json = row + event_json = db_to_json(json) + + if event_type == EventTypes.Create: + room_type = event_json.get("content", {}).get( + EventContentFields.ROOM_TYPE + ) + sliding_sync_non_joined_rooms_insert_map["room_type"] = ( + room_type + ) + elif event_type == EventTypes.RoomEncryption: + encryption_algorithm = event_json.get("content", {}).get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is_encrypted = encryption_algorithm is not None + sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( + is_encrypted + ) + elif event_type == EventTypes.Name: + room_name = event_json.get("content", {}).get( + EventContentFields.ROOM_NAME + ) + sliding_sync_non_joined_rooms_insert_map["room_name"] = ( + room_name + ) + else: + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" + ) + + # Update the `sliding_sync_non_join_memberships` table + insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() + insert_values = sliding_sync_non_joined_rooms_insert_map.values() + # TODO: Only do this for non-join membership + txn.execute_batch( + f""" + WITH data_table (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) AS ( + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?), + {", ".join("?" for _ in insert_values)} + ) + ) + INSERT INTO sliding_sync_non_join_memberships + (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) + SELECT * FROM data_table + WHERE membership != ? + ON CONFLICT (room_id, user_id) + DO UPDATE SET + membership_event_id = EXCLUDED.membership_event_id, + membership = EXCLUDED.membership, + event_stream_ordering = EXCLUDED.event_stream_ordering, + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """, + [ + [ + room_id, + user_id, + membership_event_id, + membership_event_id, + membership_event_id, + ] + + list(insert_values) + + [Membership.JOIN] + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() + ], + ) + if delta_state.no_longer_in_room: # Server is no longer in the room so we delete the room from # current_state_events, being careful we've already updated the @@ -1403,131 +1571,6 @@ def _update_current_state_txn( ], ) - # We now update `sliding_sync_non_join_memberships`. We do this regardless of - # whether the server is still in the room or not because we still want a row if - # a local user was just left/kicked or got banned from the room. - if to_delete: - txn.execute_batch( - "DELETE FROM sliding_sync_non_join_memberships" - " WHERE room_id = ? AND user_id = ?", - ( - (room_id, state_key) - for event_type, state_key in to_delete - if event_type == EventTypes.Member and self.is_mine_id(state_key) - ), - ) - - if to_insert: - membership_event_id_to_user_id_map: Dict[str, str] = {} - for state_key, event_id in to_insert.items(): - if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): - membership_event_id_to_user_id_map[event_id] = state_key[1] - - if len(membership_event_id_to_user_id_map) > 0: - # Map of values to insert/update in the `sliding_sync_non_join_memberships` table - sliding_sync_non_joined_rooms_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} - - # Fetch the events from the database - # - # TODO: We should gather this data before we delete the - # `current_state_events` in a `no_longer_in_room` situation. - ( - event_type_and_state_key_in_list_clause, - event_type_and_state_key_args, - ) = make_tuple_in_list_sql_clause( - self.database_engine, - ("type", "state_key"), - [ - (EventTypes.Create, ""), - (EventTypes.RoomEncryption, ""), - (EventTypes.Name, ""), - ], - ) - txn.execute( - f""" - SELECT c.event_id, c.type, c.state_key, j.json - FROM current_state_events AS c - INNER JOIN event_json AS j USING (event_id) - WHERE - c.room_id = ? - AND {event_type_and_state_key_in_list_clause} - """, - [room_id] + event_type_and_state_key_args, - ) - - # Parse the raw event JSON - for row in txn: - event_id, event_type, state_key, json = row - event_json = db_to_json(json) - - if event_type == EventTypes.Create: - room_type = event_json.get("content", {}).get( - EventContentFields.ROOM_TYPE - ) - sliding_sync_non_joined_rooms_insert_map["room_type"] = ( - room_type - ) - elif event_type == EventTypes.RoomEncryption: - encryption_algorithm = event_json.get("content", {}).get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is_encrypted = encryption_algorithm is not None - sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( - is_encrypted - ) - elif event_type == EventTypes.Name: - room_name = event_json.get("content", {}).get( - EventContentFields.ROOM_NAME - ) - sliding_sync_non_joined_rooms_insert_map["room_name"] = ( - room_name - ) - else: - raise AssertionError( - f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" - ) - - # Update the `sliding_sync_non_join_memberships` table - insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() - insert_values = sliding_sync_non_joined_rooms_insert_map.values() - # TODO: Only do this for non-join membership - txn.execute_batch( - f""" - WITH data_table (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) AS ( - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in insert_values)} - ) - ) - INSERT INTO sliding_sync_non_join_memberships - (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) - SELECT * FROM data_table - WHERE membership != ? - ON CONFLICT (room_id, user_id) - DO UPDATE SET - membership_event_id = EXCLUDED.membership_event_id, - membership = EXCLUDED.membership, - event_stream_ordering = EXCLUDED.event_stream_ordering, - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} - """, - [ - [ - room_id, - user_id, - membership_event_id, - membership_event_id, - membership_event_id, - ] - + list(insert_values) - + [Membership.JOIN] - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - ], - ) - txn.call_after( self.store._curr_state_delta_stream_cache.entity_has_changed, room_id, diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index ea388458b68..2fb863a8c92 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -863,7 +863,7 @@ def test_non_join_space_room_with_info(self) -> None: Test users who was invited shows up in `sliding_sync_non_join_memberships`. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") user2_id = self.register_user("user2", "pass") user2_tok = self.login(user2_id, "pass") @@ -958,7 +958,7 @@ def test_non_join_invite_ban(self) -> None: `sliding_sync_non_join_memberships`. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") user2_id = self.register_user("user2", "pass") user2_tok = self.login(user2_id, "pass") user3_id = self.register_user("user3", "pass") @@ -1088,28 +1088,36 @@ def test_non_join_server_left_room(self) -> None: self.assertIncludes( set(sliding_sync_non_join_memberships_results.keys()), { - _SlidingSyncNonJoinMembershipResult( - room_id=room_id1, - user_id=user1_id, - membership_event_id=user1_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user1_leave_event_pos.stream, - room_type=None, - room_name=None, - is_encrypted=False, - ), - _SlidingSyncNonJoinMembershipResult( - room_id=room_id1, - user_id=user2_id, - membership_event_id=user2_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user2_leave_event_pos.stream, - room_type=None, - room_name=None, - is_encrypted=False, - ), + (room_id1, user1_id), + (room_id1, user2_id), }, exact=True, ) + self.assertEqual( + sliding_sync_non_join_memberships_results.get((room_id1, user1_id)), + _SlidingSyncNonJoinMembershipResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_event_pos.stream, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_non_join_memberships_results.get((room_id1, user2_id)), + _SlidingSyncNonJoinMembershipResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=user2_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user2_leave_event_pos.stream, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) # TODO: test_non_join_state_reset From bc3796d333893c7fc1a95a7ad04f2624b5718156 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Aug 2024 20:49:46 -0500 Subject: [PATCH 017/142] Fix some lints --- synapse/storage/databases/main/events.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 843dc227528..c101820c1d9 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1310,7 +1310,6 @@ def _update_current_state_txn( # Update the `sliding_sync_non_join_memberships` table insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() insert_values = sliding_sync_non_joined_rooms_insert_map.values() - # TODO: Only do this for non-join membership txn.execute_batch( f""" WITH data_table (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) AS ( @@ -1470,12 +1469,12 @@ def _update_current_state_txn( ] = {} # If something is being deleted from the state, we need to clear it out - for event_type, state_key in to_delete: - if event_type == EventTypes.Create and state_key == "": + for state_key in to_delete: + if state_key == (EventTypes.Create, ""): sliding_sync_joined_rooms_insert_map["room_type"] = None - elif event_type == EventTypes.RoomEncryption and state_key == "": + elif state_key == (EventTypes.RoomEncryption, ""): sliding_sync_joined_rooms_insert_map["is_encrypted"] = False - elif event_type == EventTypes.Name and state_key == "": + elif state_key == (EventTypes.Name, ""): sliding_sync_joined_rooms_insert_map["room_name"] = None # Fetch the events from the database From cc2d2b6b9f8fd286725e010b30f1c3eae3ccaadf Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Aug 2024 15:41:55 -0500 Subject: [PATCH 018/142] Fill in `stream_ordering`/`bump_stamp` when we add current state to the joined rooms table --- synapse/handlers/sliding_sync.py | 23 ++++------ synapse/storage/databases/main/events.py | 53 ++++++++++++++++++++++-- synapse/types/handlers/__init__.py | 13 ++++++ tests/storage/test_events.py | 11 ++--- 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 84677665182..4253eebed26 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -74,7 +74,12 @@ StreamToken, UserID, ) -from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult +from synapse.types.handlers import ( + SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, + OperationType, + SlidingSyncConfig, + SlidingSyncResult, +) from synapse.types.state import StateFilter from synapse.util.async_helpers import concurrently_execute from synapse.visibility import filter_events_for_client @@ -91,18 +96,6 @@ class Sentinel(enum.Enum): UNSET_SENTINEL = object() -# The event types that clients should consider as new activity. -DEFAULT_BUMP_EVENT_TYPES = { - EventTypes.Create, - EventTypes.Message, - EventTypes.Encrypted, - EventTypes.Sticker, - EventTypes.CallInvite, - EventTypes.PollStart, - EventTypes.LiveLocationShareStart, -} - - @attr.s(slots=True, frozen=True, auto_attribs=True) class _RoomMembershipForUser: """ @@ -2174,7 +2167,9 @@ async def get_room_sync_data( # Figure out the last bump event in the room last_bump_event_result = ( await self.store.get_last_event_pos_in_room_before_stream_ordering( - room_id, to_token.room_key, event_types=DEFAULT_BUMP_EVENT_TYPES + room_id, + to_token.room_key, + event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, ) ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c101820c1d9..1dd114c6013 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -73,6 +73,7 @@ StrCollection, get_domain_from_id, ) +from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.stringutils import non_null_str_or_none @@ -1308,6 +1309,9 @@ def _update_current_state_txn( ) # Update the `sliding_sync_non_join_memberships` table + # + # Pulling keys/values separately is safe and will produce congruent + # lists insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() insert_values = sliding_sync_non_joined_rooms_insert_map.values() txn.execute_batch( @@ -1452,6 +1456,7 @@ def _update_current_state_txn( create_event_id = None room_encryption_event_id = None room_name_event_id = None + bump_event_id = None for state_key, event_id in to_insert.items(): if state_key[0] == EventTypes.Create and state_key[1] == "": create_event_id = event_id @@ -1463,6 +1468,12 @@ def _update_current_state_txn( room_name_event_id = event_id event_ids_to_fetch.append(event_id) + if ( + state_key[0] in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES + and state_key[1] == "" + ): + bump_event_id = event_id + # Map of values to insert/update in the `sliding_sync_joined_rooms` table sliding_sync_joined_rooms_insert_map: Dict[ str, Optional[Union[str, bool]] @@ -1515,23 +1526,57 @@ def _update_current_state_txn( ) # Update the `sliding_sync_joined_rooms` table + args: List[Any] = [ + room_id, + # Even though `Mapping`/`Dict` have no guaranteed order, some + # implementations may preserve insertion order so we're just going to + # choose the best possible answer by using the "last" event ID which we + # will assume will have the greatest `stream_ordering`. We really just + # need *some* answer in case we are the first ones inserting into the + # table and this will resolve itself when we update this field in the + # persist events loop. + list(to_insert.values())[-1], + ] + # If we have a `bump_event_id`, let's update the `bump_stamp` column + bump_stamp_column = "" + bump_stamp_values_clause = "" + if bump_event_id is not None: + bump_stamp_column = "bump_stamp, " + bump_stamp_values_clause = ( + "(SELECT stream_ordering FROM events WHERE event_id = ?)," + ) + args.append(bump_event_id) + # Pulling keys/values separately is safe and will produce congruent lists insert_keys = sliding_sync_joined_rooms_insert_map.keys() insert_values = sliding_sync_joined_rooms_insert_map.values() + args.extend(iter(insert_values)) if len(insert_keys) > 0: - # TODO: Should we add `event_stream_ordering`, `bump_stamp` on insert? + # We don't update `bump_stamp` `ON CONFLICT` because we're dealing with + # state here and the only state event that is also a bump event type is + # `m.room.create`. Given the room creation event is the first one in the + # room, it's either going to be set on insert, or we've already moved on + # to other events with a greater `stream_ordering`/`bump_stamp` and we + # don't need to even try. txn.execute( f""" INSERT INTO sliding_sync_joined_rooms - (room_id, {", ".join(insert_keys)}) + (room_id, event_stream_ordering, {bump_stamp_column} {", ".join(insert_keys)}) VALUES ( ?, + (SELECT stream_ordering FROM events WHERE event_id = ?), + {bump_stamp_values_clause} {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id) DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)}, + event_stream_ordering = CASE + WHEN event_stream_ordering < EXCLUDED.event_stream_ordering + THEN EXCLUDED.event_stream_ordering + ELSE event_stream_ordering + END """, - [room_id] + list(insert_values), + args, ) # We now update `local_current_membership`. We do this regardless diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 363f060bef9..62c86919c44 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -30,6 +30,7 @@ else: from pydantic import Extra +from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.types import ( DeviceListUpdates, @@ -45,6 +46,18 @@ if TYPE_CHECKING: from synapse.handlers.relations import BundledAggregations +# Sliding Sync: The event types that clients should consider as new activity and affect +# the `bump_stamp` +SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES = { + EventTypes.Create, + EventTypes.Message, + EventTypes.Encrypted, + EventTypes.Sticker, + EventTypes.CallInvite, + EventTypes.PollStart, + EventTypes.LiveLocationShareStart, +} + class ShutdownRoomParams(TypedDict): """ diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 2fb863a8c92..95431234736 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -523,10 +523,9 @@ class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): room.register_servlets, ] - def prepare( - self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer - ) -> None: - self.store = self.hs.get_datastores().main + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.storage_controllers = hs.get_storage_controllers() def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResult]: """ @@ -615,10 +614,8 @@ def test_joined_room_with_no_info(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) # User1 joins the room self.helper.join(room_id1, user1_id, tok=user1_tok) From ca909013c8c8ed99c92f055b2da56c68b49c4df0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Aug 2024 17:49:15 -0500 Subject: [PATCH 019/142] Fill in `stream_ordering`/`bump_stamp` for any event being persisted --- synapse/storage/databases/main/events.py | 67 ++++- .../delta/87/01_sliding_sync_memberships.sql | 13 +- tests/storage/test_events.py | 238 ++++++++++++++---- 3 files changed, 266 insertions(+), 52 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1dd114c6013..2b91f1f67cf 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1530,12 +1530,13 @@ def _update_current_state_txn( room_id, # Even though `Mapping`/`Dict` have no guaranteed order, some # implementations may preserve insertion order so we're just going to - # choose the best possible answer by using the "last" event ID which we + # choose the best possible answer by using the "first" event ID which we # will assume will have the greatest `stream_ordering`. We really just # need *some* answer in case we are the first ones inserting into the - # table and this will resolve itself when we update this field in the - # persist events loop. - list(to_insert.values())[-1], + # table and in reality, `_store_event_txn()` is run before this function + # so it will already have the correct value. This is just to account for + # things changing in the future. + next(iter(to_insert.values())), ] # If we have a `bump_event_id`, let's update the `bump_stamp` column bump_stamp_column = "" @@ -1977,6 +1978,64 @@ def event_dict(event: EventBase) -> JsonDict: ], ) + # Handle updating `sliding_sync_joined_rooms` + room_id_to_stream_ordering_map: Dict[str, int] = {} + room_id_to_bump_stamp_map: Dict[str, int] = {} + for event, _ in events_and_contexts: + existing_stream_ordering = room_id_to_stream_ordering_map.get(event.room_id) + # This should exist at this point because we're inserting events here which require it + assert event.internal_metadata.stream_ordering is not None + if ( + existing_stream_ordering is None + or existing_stream_ordering < event.internal_metadata.stream_ordering + ): + room_id_to_stream_ordering_map[event.room_id] = ( + event.internal_metadata.stream_ordering + ) + + if event.type in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES: + existing_bump_stamp = room_id_to_bump_stamp_map.get(event.room_id) + # This should exist at this point because we're inserting events here which require it + assert event.internal_metadata.stream_ordering is not None + if ( + existing_bump_stamp is None + or existing_bump_stamp < event.internal_metadata.stream_ordering + ): + room_id_to_bump_stamp_map[event.room_id] = ( + event.internal_metadata.stream_ordering + ) + + # `_store_event_txn` is run before `_update_current_state_txn` which handles + # deleting the rows if we are no longer in the room so we don't need to worry + # about inserting something that will be orphaned. + self.db_pool.simple_upsert_many_txn( + txn, + table="sliding_sync_joined_rooms", + key_names=("room_id",), + key_values=[ + (room_id,) for room_id in room_id_to_stream_ordering_map.keys() + ], + value_names=("event_stream_ordering",), + value_values=[ + (room_id_to_stream_ordering_map[room_id],) + for room_id in room_id_to_stream_ordering_map.keys() + ], + ) + # This has to be separate from the upsert above because we won't have a + # `bump_stamp` for every event and we don't want to overwrite the existing value + # with `None`. + self.db_pool.simple_upsert_many_txn( + txn, + table="sliding_sync_joined_rooms", + key_names=("room_id",), + key_values=[(room_id,) for room_id in room_id_to_bump_stamp_map.keys()], + value_names=("bump_stamp",), + value_values=[ + (room_id_to_bump_stamp_map[room_id],) + for room_id in room_id_to_bump_stamp_map.keys() + ], + ) + def _store_rejected_events_txn( self, txn: LoggingTransaction, diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 61ea65aba21..a60fabb5862 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -31,7 +31,16 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( PRIMARY KEY (room_id) ); - +-- Store the user's non-join room memberships. Only stores the latest membership event +-- for a given user which matches `local_current_membership` (except we don't store +-- joins). +-- +-- FIXME: It might be easier to just store any membership here but just indicate that +-- the state is a snapshot of the current state at the time of the membership event. +-- That way we don't have to worry about clearing out +-- `sliding_sync_non_join_memberships` when the the user joins a room. And we always +-- have the full picture. Perhaps we can call it `sliding_sync_membership_snapshots`. +-- -- We don't include `bump_stamp` here because we can just use the `stream_ordering` from -- the membership event itself as the `bump_stamp`. CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( @@ -50,6 +59,8 @@ CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( -- `m.room.encryption` -> `content.algorithm` (according to the current state at the -- time of the membership) is_encrypted BOOLEAN DEFAULT 0 NOT NULL, + -- FIXME: Maybe we want to add `tombstone_successor_room_id` here to help with `include_old_rooms` + -- (tracked by https://github.com/element-hq/synapse/issues/17540) PRIMARY KEY (room_id, user_id) ); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 95431234736..b8302a79282 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -617,8 +617,9 @@ def test_joined_room_with_no_info(self) -> None: room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) - # User1 joins the room - self.helper.join(room_id1, user1_id, tok=user1_tok) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( @@ -630,15 +631,20 @@ def test_joined_room_with_no_info(self) -> None: sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( room_id=room_id1, - # TODO - event_stream_ordering=None, - bump_stamp=None, + # History visibility just happens to be the last event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomHistoryVisibility, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, room_type=None, room_name=None, is_encrypted=False, ), ) + # No one is non-joined to this room so we shouldn't see anything sliding_sync_non_join_memberships_results = ( self._get_sliding_sync_non_join_memberships() ) @@ -674,7 +680,14 @@ def test_joined_room_with_info(self) -> None: ) # User1 joins the room - self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( @@ -686,9 +699,10 @@ def test_joined_room_with_info(self) -> None: sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( room_id=room_id1, - # TODO - event_stream_ordering=None, - bump_stamp=None, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, room_type=None, room_name="my super duper room", is_encrypted=True, @@ -704,10 +718,73 @@ def test_joined_room_with_info(self) -> None: exact=True, ) - def test_joined_room_with_info_updated(self) -> None: + def test_joined_space_room_with_info(self) -> None: """ - Test info in `sliding_sync_joined_rooms` is updated when the current state is - updated. + Test joined space room with name shows up in `sliding_sync_joined_rooms`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + space_room_id = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # User1 joins the room + user1_join_response = self.helper.join(space_room_id, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {space_room_id}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) + + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() + ) + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, + ) + + def test_joined_room_with_state_updated(self) -> None: + """ + Test state derived info in `sliding_sync_joined_rooms` is updated when the + current state is updated. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -724,7 +801,14 @@ def test_joined_room_with_info_updated(self) -> None: ) # User1 joins the room - self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( @@ -736,9 +820,10 @@ def test_joined_room_with_info_updated(self) -> None: sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( room_id=room_id1, - # TODO - event_stream_ordering=None, - bump_stamp=None, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, room_type=None, room_name="my super duper room", is_encrypted=False, @@ -762,12 +847,15 @@ def test_joined_room_with_info_updated(self) -> None: tok=user2_tok, ) # Encrypt the room - self.helper.send_state( + encrypt_room_response = self.helper.send_state( room_id1, EventTypes.RoomEncryption, {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user2_tok, ) + encrypt_room_event_pos = self.get_success( + self.store.get_position_for_event(encrypt_room_response["event_id"]) + ) sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( @@ -780,9 +868,10 @@ def test_joined_room_with_info_updated(self) -> None: sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( room_id=room_id1, - # TODO - event_stream_ordering=None, - bump_stamp=None, + event_stream_ordering=encrypt_room_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, room_type=None, room_name="my super duper room was renamed", is_encrypted=True, @@ -798,48 +887,88 @@ def test_joined_room_with_info_updated(self) -> None: exact=True, ) - def test_joined_space_room_with_info(self) -> None: + def test_joined_room_is_bumped(self) -> None: """ - Test joined space room with name shows up in `sliding_sync_joined_rooms`. + Test that `event_stream_ordering` and `bump_stamp` is updated when a new bump + event is sent (`sliding_sync_joined_rooms`). """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") user2_id = self.register_user("user2", "pass") user2_tok = self.login(user2_id, "pass") - space_room_id = self.helper.create_room_as( - user2_id, - tok=user2_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) # Add a room name self.helper.send_state( - space_room_id, + room_id1, EventTypes.Name, - {"name": "my super duper space"}, + {"name": "my super duper room"}, tok=user2_tok, ) # User1 joins the room - self.helper.join(space_room_id, user1_id, tok=user1_tok) + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( set(sliding_sync_joined_rooms_results.keys()), - {space_room_id}, + {room_id1}, exact=True, ) self.assertEqual( - sliding_sync_joined_rooms_results[space_room_id], + sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( - room_id=space_room_id, - # TODO - event_stream_ordering=None, - bump_stamp=None, - room_type=RoomTypes.SPACE, - room_name="my super duper space", + room_id=room_id1, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + sliding_sync_non_join_memberships_results = ( + self._get_sliding_sync_non_join_memberships() + ) + self.assertIncludes( + set(sliding_sync_non_join_memberships_results.keys()), + set(), + exact=True, + ) + + # Send a new message to bump the room + event_response = self.helper.send(room_id1, "some message", tok=user1_tok) + event_pos = self.get_success( + self.store.get_position_for_event(event_response["event_id"]) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + # Make sure we see the new room name + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Updated `event_stream_ordering` + event_stream_ordering=event_pos.stream, + # And since the event was a bump event, the `bump_stamp` should be updated + bump_stamp=event_pos.stream, + # The state is still the same (it didn't change) + room_type=None, + room_name="my super duper room", is_encrypted=False, ), ) @@ -896,12 +1025,19 @@ def test_non_join_space_room_with_info(self) -> None: # Update the room name after we are invited just to make sure # we don't update non-join memberships when the room name changes. - self.helper.send_state( + rename_response = self.helper.send_state( space_room_id, EventTypes.Name, {"name": "my super duper space was renamed"}, tok=user2_tok, ) + rename_event_pos = self.get_success( + self.store.get_position_for_event(rename_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) # User2 is still joined to the room so we should still have an entry in the # `sliding_sync_joined_rooms` table. @@ -915,9 +1051,10 @@ def test_non_join_space_room_with_info(self) -> None: sliding_sync_joined_rooms_results[space_room_id], _SlidingSyncJoinedRoomResult( room_id=space_room_id, - # TODO - event_stream_ordering=None, - bump_stamp=None, + event_stream_ordering=rename_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, room_type=RoomTypes.SPACE, room_name="my super duper space was renamed", is_encrypted=True, @@ -981,6 +1118,10 @@ def test_non_join_invite_ban(self) -> None: self.store.get_position_for_event(user3_ban_response["event_id"]) ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + # User2 is still joined to the room so we should still have an entry # in the `sliding_sync_joined_rooms` table. sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() @@ -993,9 +1134,10 @@ def test_non_join_invite_ban(self) -> None: sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( room_id=room_id1, - # TODO - event_stream_ordering=None, - bump_stamp=None, + event_stream_ordering=user3_ban_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, room_type=None, room_name=None, is_encrypted=False, @@ -1042,6 +1184,8 @@ def test_non_join_invite_ban(self) -> None: # TODO: Test remote invite + # TODO Test for non-join membership changing + def test_non_join_server_left_room(self) -> None: """ Test everyone local leaves the room but their leave membership still shows up in From 3367422fd3b7ac352b65b9c5da4601ec055cb13e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Aug 2024 18:23:50 -0500 Subject: [PATCH 020/142] Need to fix upsert --- synapse/storage/databases/main/events.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2b91f1f67cf..276188e21b6 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2008,6 +2008,9 @@ def event_dict(event: EventBase) -> JsonDict: # `_store_event_txn` is run before `_update_current_state_txn` which handles # deleting the rows if we are no longer in the room so we don't need to worry # about inserting something that will be orphaned. + # + # FIXME: We need to handle cases where we are persisting events out of order and + # the stream_ordering didn't increase. self.db_pool.simple_upsert_many_txn( txn, table="sliding_sync_joined_rooms", From ed47a7eff5a26e94a06f164366ee151d7cb561f0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 11:27:17 -0500 Subject: [PATCH 021/142] Fix bumping when events are persisted out of order --- synapse/storage/databases/main/events.py | 62 ++++++++++++------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 276188e21b6..267b66272a1 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1572,7 +1572,7 @@ def _update_current_state_txn( DO UPDATE SET {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)}, event_stream_ordering = CASE - WHEN event_stream_ordering < EXCLUDED.event_stream_ordering + WHEN event_stream_ordering IS NULL OR event_stream_ordering < EXCLUDED.event_stream_ordering THEN EXCLUDED.event_stream_ordering ELSE event_stream_ordering END @@ -2005,39 +2005,39 @@ def event_dict(event: EventBase) -> JsonDict: event.internal_metadata.stream_ordering ) - # `_store_event_txn` is run before `_update_current_state_txn` which handles - # deleting the rows if we are no longer in the room so we don't need to worry - # about inserting something that will be orphaned. - # - # FIXME: We need to handle cases where we are persisting events out of order and - # the stream_ordering didn't increase. - self.db_pool.simple_upsert_many_txn( - txn, - table="sliding_sync_joined_rooms", - key_names=("room_id",), - key_values=[ - (room_id,) for room_id in room_id_to_stream_ordering_map.keys() - ], - value_names=("event_stream_ordering",), - value_values=[ - (room_id_to_stream_ordering_map[room_id],) + # This function (`_store_event_txn(...)`) is run before + # `_update_current_state_txn(...)` which handles deleting the rows if we are no + # longer in the room so we don't need to worry about inserting something that + # will be orphaned. + txn.execute_batch( + f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, event_stream_ordering, bump_stamp) + VALUES ( + ?, ?, ? + ) + ON CONFLICT (room_id) + DO UPDATE SET + event_stream_ordering = CASE + WHEN event_stream_ordering IS NULL OR event_stream_ordering < EXCLUDED.event_stream_ordering + THEN EXCLUDED.event_stream_ordering + ELSE event_stream_ordering + END, + bump_stamp = CASE + WHEN bump_stamp IS NULL OR bump_stamp < EXCLUDED.bump_stamp + THEN EXCLUDED.bump_stamp + ELSE bump_stamp + END + """, + [ + [ + room_id, + room_id_to_stream_ordering_map[room_id], + room_id_to_bump_stamp_map.get(room_id), + ] for room_id in room_id_to_stream_ordering_map.keys() ], ) - # This has to be separate from the upsert above because we won't have a - # `bump_stamp` for every event and we don't want to overwrite the existing value - # with `None`. - self.db_pool.simple_upsert_many_txn( - txn, - table="sliding_sync_joined_rooms", - key_names=("room_id",), - key_values=[(room_id,) for room_id in room_id_to_bump_stamp_map.keys()], - value_names=("bump_stamp",), - value_values=[ - (room_id_to_bump_stamp_map[room_id],) - for room_id in room_id_to_bump_stamp_map.keys() - ], - ) def _store_rejected_events_txn( self, From 0af3b4822c5a254f81458017764d7f35aa3cc9b9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 15:10:44 -0500 Subject: [PATCH 022/142] Refactor to `sliding_sync_membership_snapshots` --- synapse/_scripts/synapse_port_db.py | 2 +- synapse/storage/databases/main/events.py | 44 +-- .../delta/87/01_sliding_sync_memberships.sql | 21 +- tests/storage/test_events.py | 342 ++++++++++++++---- 4 files changed, 305 insertions(+), 104 deletions(-) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 95c6783905a..49088dc506e 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -130,7 +130,7 @@ "room_stats_state": ["is_federatable"], "rooms": ["is_public", "has_auth_chain_index"], "sliding_sync_joined_rooms": ["is_encrypted"], - "sliding_sync_non_join_memberships": ["is_encrypted"], + "sliding_sync_membership_snapshots": ["is_encrypted"], "users": ["shadow_banned", "approved", "locked", "suspended"], "un_partial_stated_event_stream": ["rejection_status_changed"], "users_who_share_rooms": ["share_private"], diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 267b66272a1..d71f054eca2 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -45,7 +45,6 @@ from synapse.api.constants import ( EventContentFields, EventTypes, - Membership, RelationTypes, ) from synapse.api.errors import PartialStateConflictError @@ -1185,12 +1184,12 @@ def _update_current_state_txn( if ev_type == EventTypes.Member } - # We now update `sliding_sync_non_join_memberships`. + # We now update `sliding_sync_membership_snapshots`. # # This would only happen if someone was state reset out of the room if to_delete: txn.execute_batch( - "DELETE FROM sliding_sync_non_join_memberships" + "DELETE FROM sliding_sync_membership_snapshots" " WHERE room_id = ? AND user_id = ?", ( (room_id, state_key) @@ -1199,7 +1198,7 @@ def _update_current_state_txn( ), ) - # We handle `sliding_sync_non_join_memberships` before `current_state_events` so + # We handle `sliding_sync_membership_snapshots` before `current_state_events` so # we can gather the current state before it might be deleted if we are # `no_longer_in_room`. # @@ -1213,8 +1212,8 @@ def _update_current_state_txn( membership_event_id_to_user_id_map[event_id] = state_key[1] if len(membership_event_id_to_user_id_map) > 0: - # Map of values to insert/update in the `sliding_sync_non_join_memberships` table - sliding_sync_non_joined_rooms_insert_map: Dict[ + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_insert_map: Dict[ str, Optional[Union[str, bool]] ] = {} @@ -1285,7 +1284,7 @@ def _update_current_state_txn( room_type = event_json.get("content", {}).get( EventContentFields.ROOM_TYPE ) - sliding_sync_non_joined_rooms_insert_map["room_type"] = ( + sliding_sync_membership_snapshots_insert_map["room_type"] = ( room_type ) elif event_type == EventTypes.RoomEncryption: @@ -1293,14 +1292,14 @@ def _update_current_state_txn( EventContentFields.ENCRYPTION_ALGORITHM ) is_encrypted = encryption_algorithm is not None - sliding_sync_non_joined_rooms_insert_map["is_encrypted"] = ( + sliding_sync_membership_snapshots_insert_map["is_encrypted"] = ( is_encrypted ) elif event_type == EventTypes.Name: room_name = event_json.get("content", {}).get( EventContentFields.ROOM_NAME ) - sliding_sync_non_joined_rooms_insert_map["room_name"] = ( + sliding_sync_membership_snapshots_insert_map["room_name"] = ( room_name ) else: @@ -1308,26 +1307,22 @@ def _update_current_state_txn( f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" ) - # Update the `sliding_sync_non_join_memberships` table + # Update the `sliding_sync_membership_snapshots` table # # Pulling keys/values separately is safe and will produce congruent # lists - insert_keys = sliding_sync_non_joined_rooms_insert_map.keys() - insert_values = sliding_sync_non_joined_rooms_insert_map.values() + insert_keys = sliding_sync_membership_snapshots_insert_map.keys() + insert_values = sliding_sync_membership_snapshots_insert_map.values() txn.execute_batch( f""" - WITH data_table (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) AS ( - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in insert_values)} - ) - ) - INSERT INTO sliding_sync_non_join_memberships + INSERT INTO sliding_sync_membership_snapshots (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) - SELECT * FROM data_table - WHERE membership != ? + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?), + {", ".join("?" for _ in insert_values)} + ) ON CONFLICT (room_id, user_id) DO UPDATE SET membership_event_id = EXCLUDED.membership_event_id, @@ -1344,7 +1339,6 @@ def _update_current_state_txn( membership_event_id, ] + list(insert_values) - + [Membership.JOIN] for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ], ) @@ -2010,7 +2004,7 @@ def event_dict(event: EventBase) -> JsonDict: # longer in the room so we don't need to worry about inserting something that # will be orphaned. txn.execute_batch( - f""" + """ INSERT INTO sliding_sync_joined_rooms (room_id, event_stream_ordering, bump_stamp) VALUES ( diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index a60fabb5862..b1b249a2360 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -12,8 +12,8 @@ -- . -- We store the join memberships in a separate table from --- `sliding_sync_non_join_memberships` because the information can be shared across --- everyone who is joined. +-- `sliding_sync_membership_snapshots` because we need up-to-date information for joined +-- rooms and it can be shared across everyone who is joined. -- -- This table is kept in sync with `current_state_events` CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( @@ -31,19 +31,12 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( PRIMARY KEY (room_id) ); --- Store the user's non-join room memberships. Only stores the latest membership event --- for a given user which matches `local_current_membership` (except we don't store --- joins). --- --- FIXME: It might be easier to just store any membership here but just indicate that --- the state is a snapshot of the current state at the time of the membership event. --- That way we don't have to worry about clearing out --- `sliding_sync_non_join_memberships` when the the user joins a room. And we always --- have the full picture. Perhaps we can call it `sliding_sync_membership_snapshots`. +-- Store a snapshot of some state relevant for sliding sync for a user's room +-- membership. Only stores the latest membership event for a given user in a room. -- -- We don't include `bump_stamp` here because we can just use the `stream_ordering` from -- the membership event itself as the `bump_stamp`. -CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( +CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( room_id TEXT NOT NULL REFERENCES rooms(room_id), user_id TEXT NOT NULL, membership_event_id TEXT NOT NULL REFERENCES events(event_id), @@ -64,5 +57,5 @@ CREATE TABLE IF NOT EXISTS sliding_sync_non_join_memberships( PRIMARY KEY (room_id, user_id) ); -CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_non_join_memberships_event_stream_ordering ON sliding_sync_non_join_memberships(event_stream_ordering); -CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_non_join_memberships_membership_event_id ON sliding_sync_non_join_memberships(membership_event_id); +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream_ordering ON sliding_sync_membership_snapshots(event_stream_ordering); +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_membership_event_id ON sliding_sync_membership_snapshots(membership_event_id); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index b8302a79282..1cdcac2b8d1 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -499,7 +499,7 @@ class _SlidingSyncJoinedRoomResult: @attr.s(slots=True, frozen=True, auto_attribs=True) -class _SlidingSyncNonJoinMembershipResult: +class _SlidingSyncMembershipSnapshotResult: room_id: str user_id: str membership_event_id: str @@ -513,7 +513,7 @@ class _SlidingSyncNonJoinMembershipResult: class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): """ Tests to make sure the - `sliding_sync_joined_rooms`/`sliding_sync_non_join_memberships` database tables are + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` database tables are populated correctly. """ @@ -564,20 +564,20 @@ def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResu for row in rows } - def _get_sliding_sync_non_join_memberships( + def _get_sliding_sync_membership_snapshots( self, - ) -> Dict[Tuple[str, str], _SlidingSyncNonJoinMembershipResult]: + ) -> Dict[Tuple[str, str], _SlidingSyncMembershipSnapshotResult]: """ - Return the rows from the `sliding_sync_non_join_memberships` table. + Return the rows from the `sliding_sync_membership_snapshots` table. Returns: - Mapping from the (room_id, user_id) to _SlidingSyncNonJoinMembershipResult. + Mapping from the (room_id, user_id) to _SlidingSyncMembershipSnapshotResult. """ rows = cast( List[Tuple[str, str, str, str, int, str, str, bool]], self.get_success( self.store.db_pool.simple_select_list( - "sliding_sync_non_join_memberships", + "sliding_sync_membership_snapshots", None, retcols=( "room_id", @@ -594,7 +594,7 @@ def _get_sliding_sync_non_join_memberships( ) return { - (row[0], row[1]): _SlidingSyncNonJoinMembershipResult( + (row[0], row[1]): _SlidingSyncMembershipSnapshotResult( room_id=row[0], user_id=row[1], membership_event_id=row[2], @@ -644,15 +644,32 @@ def test_joined_room_with_no_info(self) -> None: ), ) - # No one is non-joined to this room so we shouldn't see anything - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + }, exact=True, ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) def test_joined_room_with_info(self) -> None: """ @@ -680,10 +697,7 @@ def test_joined_room_with_info(self) -> None: ) # User1 joins the room - user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) - user1_join_event_pos = self.get_success( - self.store.get_position_for_event(user1_join_response["event_id"]) - ) + self.helper.join(room_id1, user1_id, tok=user1_tok) state_map = self.get_success( self.storage_controllers.state.get_current_state(room_id1) @@ -699,7 +713,10 @@ def test_joined_room_with_info(self) -> None: sliding_sync_joined_rooms_results[room_id1], _SlidingSyncJoinedRoomResult( room_id=room_id1, - event_stream_ordering=user1_join_event_pos.stream, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, bump_stamp=state_map[ (EventTypes.Create, "") ].internal_metadata.stream_ordering, @@ -709,14 +726,52 @@ def test_joined_room_with_info(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, exact=True, ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + room_type=None, + # Even though this room does have a name and is encrypted, user2 is the + # room creator and joined at the room creation time which didn't have + # this state set yet. + room_name=None, + is_encrypted=False, + ), + ) def test_joined_space_room_with_info(self) -> None: """ @@ -772,14 +827,51 @@ def test_joined_space_room_with_info(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (space_room_id, user1_id), + (space_room_id, user2_id), + }, exact=True, ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + # Even though this room does have a name, user2 is the room creator and + # joined at the room creation time which didn't have this state set yet. + room_name=None, + is_encrypted=False, + ), + ) def test_joined_room_with_state_updated(self) -> None: """ @@ -830,12 +922,15 @@ def test_joined_room_with_state_updated(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, exact=True, ) @@ -878,14 +973,49 @@ def test_joined_room_with_state_updated(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, exact=True, ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) def test_joined_room_is_bumped(self) -> None: """ @@ -936,14 +1066,51 @@ def test_joined_room_is_bumped(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, exact=True, ) + # Holds the info according to the current state when the user joined + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) # Send a new message to bump the room event_response = self.helper.send(room_id1, "some message", tok=user1_tok) @@ -973,20 +1140,31 @@ def test_joined_room_is_bumped(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), - set(), + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, exact=True, ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + user1_snapshot, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) # TODO: test_joined_room_state_reset def test_non_join_space_room_with_info(self) -> None: """ - Test users who was invited shows up in `sliding_sync_non_join_memberships`. + Test users who was invited shows up in `sliding_sync_membership_snapshots`. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -1061,20 +1239,21 @@ def test_non_join_space_room_with_info(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), + set(sliding_sync_membership_snapshots_results.keys()), { (space_room_id, user1_id), + (space_room_id, user2_id), }, exact=True, ) # Holds the info according to the current state when the user was invited self.assertEqual( - sliding_sync_non_join_memberships_results.get((space_room_id, user1_id)), - _SlidingSyncNonJoinMembershipResult( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, membership_event_id=user1_invited_response["event_id"], @@ -1085,11 +1264,27 @@ def test_non_join_space_room_with_info(self) -> None: is_encrypted=True, ), ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name=None, + is_encrypted=False, + ), + ) def test_non_join_invite_ban(self) -> None: """ Test users who have invite/ban membership in room shows up in - `sliding_sync_non_join_memberships`. + `sliding_sync_membership_snapshots`. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -1144,20 +1339,22 @@ def test_non_join_invite_ban(self) -> None: ), ) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), + set(sliding_sync_membership_snapshots_results.keys()), { (room_id1, user1_id), + (room_id1, user2_id), (room_id1, user3_id), }, exact=True, ) + # Holds the info according to the current state when the user was invited self.assertEqual( - sliding_sync_non_join_memberships_results.get((room_id1, user1_id)), - _SlidingSyncNonJoinMembershipResult( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, membership_event_id=user1_invited_response["event_id"], @@ -1168,9 +1365,26 @@ def test_non_join_invite_ban(self) -> None: is_encrypted=False, ), ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the user was banned self.assertEqual( - sliding_sync_non_join_memberships_results.get((room_id1, user3_id)), - _SlidingSyncNonJoinMembershipResult( + sliding_sync_membership_snapshots_results.get((room_id1, user3_id)), + _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user3_id, membership_event_id=user3_ban_response["event_id"], @@ -1189,7 +1403,7 @@ def test_non_join_invite_ban(self) -> None: def test_non_join_server_left_room(self) -> None: """ Test everyone local leaves the room but their leave membership still shows up in - `sliding_sync_non_join_memberships`. + `sliding_sync_membership_snapshots`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1223,11 +1437,11 @@ def test_non_join_server_left_room(self) -> None: ) # We should still see rows for the leave events (non-joins) - sliding_sync_non_join_memberships_results = ( - self._get_sliding_sync_non_join_memberships() + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( - set(sliding_sync_non_join_memberships_results.keys()), + set(sliding_sync_membership_snapshots_results.keys()), { (room_id1, user1_id), (room_id1, user2_id), @@ -1235,8 +1449,8 @@ def test_non_join_server_left_room(self) -> None: exact=True, ) self.assertEqual( - sliding_sync_non_join_memberships_results.get((room_id1, user1_id)), - _SlidingSyncNonJoinMembershipResult( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, membership_event_id=user1_leave_response["event_id"], @@ -1248,8 +1462,8 @@ def test_non_join_server_left_room(self) -> None: ), ) self.assertEqual( - sliding_sync_non_join_memberships_results.get((room_id1, user2_id)), - _SlidingSyncNonJoinMembershipResult( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, membership_event_id=user2_leave_response["event_id"], From 552f8f496d0949493de786fca0fbbae5e8e24ea6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 15:43:06 -0500 Subject: [PATCH 023/142] Update descriptions --- .../delta/87/01_sliding_sync_memberships.sql | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index b1b249a2360..fc19e08c727 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -11,11 +11,15 @@ -- See the GNU Affero General Public License for more details: -- . --- We store the join memberships in a separate table from --- `sliding_sync_membership_snapshots` because we need up-to-date information for joined --- rooms and it can be shared across everyone who is joined. +-- A table for storing room meta data (current state relevant to sliding sync) that the +-- local server is still participating in (someone local is joined to the room). -- --- This table is kept in sync with `current_state_events` +-- We store the joined rooms in separate table from `sliding_sync_membership_snapshots` +-- because we need up-to-date information for joined rooms and it can be shared across +-- everyone who is joined. +-- +-- This table is kept in sync with `current_state_events` which means if the server is +-- no longer participating in a room, the row will be deleted. CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( room_id TEXT NOT NULL REFERENCES rooms(room_id), -- The `stream_ordering` of the latest event in the room @@ -28,11 +32,21 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( room_name TEXT, -- `m.room.encryption` -> `content.algorithm` (current state) is_encrypted BOOLEAN DEFAULT 0 NOT NULL, + -- FIXME: Maybe we want to add `tombstone_successor_room_id` here to help with `include_old_rooms` + -- (tracked by https://github.com/element-hq/synapse/issues/17540) PRIMARY KEY (room_id) ); --- Store a snapshot of some state relevant for sliding sync for a user's room --- membership. Only stores the latest membership event for a given user in a room. +-- A table for storing a snapshot of room meta data (historical current state relevant +-- for sliding sync) at the time of a local user's membership. Only has rows for the +-- latest membership event for a given local user in a room which matches +-- `local_current_membership` . +-- +-- We store all memberships including joins. This makes it easy to reference this table +-- to find all membership for a given user and shares the same semantics as +-- `local_current_membership`. And we get to avoid some table maintenance; if we only +-- stored non-joins, we would have to delete the row for the user when the user joins +-- the room. -- -- We don't include `bump_stamp` here because we can just use the `stream_ordering` from -- the membership event itself as the `bump_stamp`. @@ -57,5 +71,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( PRIMARY KEY (room_id, user_id) ); +-- So we can fetch all rooms for a given user +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_user_id ON sliding_sync_membership_snapshots(user_id); +-- So we can sort by `stream_ordering CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream_ordering ON sliding_sync_membership_snapshots(event_stream_ordering); -CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_membership_event_id ON sliding_sync_membership_snapshots(membership_event_id); From f069659343609cb7e725d90d847f3c7062a3d190 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 15:49:40 -0500 Subject: [PATCH 024/142] Fix lints --- synapse/storage/databases/main/events.py | 6 +--- .../delta/87/01_sliding_sync_memberships.sql | 4 +-- tests/storage/test_events.py | 28 +++++++++++++------ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index d71f054eca2..ae43ad0fc66 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -42,11 +42,7 @@ from prometheus_client import Counter import synapse.metrics -from synapse.api.constants import ( - EventContentFields, - EventTypes, - RelationTypes, -) +from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.errors import PartialStateConflictError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index fc19e08c727..4e8fd0cd2dc 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( room_id TEXT NOT NULL REFERENCES rooms(room_id), -- The `stream_ordering` of the latest event in the room - event_stream_ordering BIGINT REFERENCES events(stream_ordering), + event_stream_ordering BIGINT NOT NULL REFERENCES events(stream_ordering), -- The `stream_ordering` of the last event according to the `bump_event_types` bump_stamp BIGINT, -- `m.room.create` -> `content.type` (current state) @@ -56,7 +56,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( membership_event_id TEXT NOT NULL REFERENCES events(event_id), membership TEXT NOT NULL, -- `stream_ordering` of the `membership_event_id` - event_stream_ordering BIGINT REFERENCES events(stream_ordering), + event_stream_ordering BIGINT NOT NULL REFERENCES events(stream_ordering), -- `m.room.create` -> `content.type` (according to the current state at the time of -- the membership) room_type TEXT, diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 1cdcac2b8d1..9a51baffab3 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -491,11 +491,16 @@ def test_room_remote_user_cache_invalidated(self) -> None: @attr.s(slots=True, frozen=True, auto_attribs=True) class _SlidingSyncJoinedRoomResult: room_id: str - event_stream_ordering: int - bump_stamp: int - room_type: str - room_name: str - is_encrypted: bool + # `event_stream_ordering` is only optional to allow easier semantics when we make + # expected objects from `event.internal_metadata.stream_ordering`. in the tests. + # `event.internal_metadata.stream_ordering` is marked optional because it only + # exists for persisted events but in the context of these tests, we're only working + # with persisted events and we're making comparisons so we will find any mismatch. + event_stream_ordering: Optional[int] + bump_stamp: Optional[int] + room_type: Optional[str] + room_name: Optional[str] + is_encrypted: Optional[bool] @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -504,10 +509,15 @@ class _SlidingSyncMembershipSnapshotResult: user_id: str membership_event_id: str membership: str - event_stream_ordering: int - room_type: str - room_name: str - is_encrypted: bool + # `event_stream_ordering` is only optional to allow easier semantics when we make + # expected objects from `event.internal_metadata.stream_ordering`. in the tests. + # `event.internal_metadata.stream_ordering` is marked optional because it only + # exists for persisted events but in the context of these tests, we're only working + # with persisted events and we're making comparisons so we will find any mismatch. + event_stream_ordering: Optional[int] + room_type: Optional[str] + room_name: Optional[str] + is_encrypted: Optional[bool] class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): From 53232e6df53fd397d72fca753fa35a3b85b14f58 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 18:13:58 -0500 Subject: [PATCH 025/142] Fill in for remote invites (out of band, outlier membership) --- synapse/storage/databases/main/events.py | 134 +++++++++++++++++- .../delta/87/01_sliding_sync_memberships.sql | 14 +- tests/storage/test_events.py | 1 + 3 files changed, 145 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ae43ad0fc66..984a54bf2e2 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -42,11 +42,17 @@ from prometheus_client import Counter import synapse.metrics -from synapse.api.constants import EventContentFields, EventTypes, RelationTypes +from synapse.api.constants import ( + EventContentFields, + EventTypes, + Membership, + RelationTypes, +) from synapse.api.errors import PartialStateConflictError from synapse.api.room_versions import RoomVersions -from synapse.events import EventBase, relation_from_event +from synapse.events import EventBase, StrippedStateEvent, relation_from_event from synapse.events.snapshot import EventContext +from synapse.events.utils import parse_stripped_state_event from synapse.logging.opentracing import trace from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -1312,11 +1318,12 @@ def _update_current_state_txn( txn.execute_batch( f""" INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering, {", ".join(insert_keys)}) + (room_id, user_id, membership_event_id, membership, event_stream_ordering, has_known_state, {", ".join(insert_keys)}) VALUES ( ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?), (SELECT stream_ordering FROM events WHERE event_id = ?), + ?, {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id, user_id) @@ -1333,6 +1340,7 @@ def _update_current_state_txn( membership_event_id, membership_event_id, membership_event_id, + True, # has_known_state ] + list(insert_values) for membership_event_id, user_id in membership_event_id_to_user_id_map.items() @@ -2304,6 +2312,7 @@ def _store_room_members_txn( ) for event in events: + # Sanity check that we're working with persisted events assert event.internal_metadata.stream_ordering is not None # We update the local_current_membership table only if the event is @@ -2318,6 +2327,16 @@ def _store_room_members_txn( and event.internal_metadata.is_outlier() and event.internal_metadata.is_out_of_band_membership() ): + # The only sort of out-of-band-membership events we expect to see here + # are remote invites/knocks and LEAVE events corresponding to + # rejected/retracted invites and rescinded knocks. + assert event.type == EventTypes.Member + assert event.membership in ( + Membership.INVITE, + Membership.KNOCK, + Membership.LEAVE, + ) + self.db_pool.simple_upsert_txn( txn, table="local_current_membership", @@ -2329,6 +2348,115 @@ def _store_room_members_txn( }, ) + # Update the `sliding_sync_membership_snapshots` table + # + raw_stripped_state_events = None + if event.membership == Membership.INVITE: + invite_room_state = event.unsigned.get("invite_room_state") + raw_stripped_state_events = invite_room_state + elif event.membership == Membership.KNOCK: + knock_room_state = event.unsigned.get("knock_room_state") + raw_stripped_state_events = knock_room_state + + insert_values = { + "membership_event_id": event.event_id, + "membership": event.membership, + "event_stream_ordering": event.internal_metadata.stream_ordering, + } + if raw_stripped_state_events is not None: + stripped_state_map: MutableStateMap[StrippedStateEvent] = {} + if isinstance(raw_stripped_state_events, list): + for raw_stripped_event in raw_stripped_state_events: + stripped_state_event = parse_stripped_state_event( + raw_stripped_event + ) + if stripped_state_event is not None: + stripped_state_map[ + ( + stripped_state_event.type, + stripped_state_event.state_key, + ) + ] = stripped_state_event + + # If there is some stripped state, we assume the remote server passed *all* + # of the potential stripped state events for the room. + create_stripped_event = stripped_state_map.get( + (EventTypes.Create, "") + ) + # Sanity check that we at-least have the create event + if create_stripped_event is not None: + # Find the room_type + insert_values["room_type"] = ( + create_stripped_event.content.get( + EventContentFields.ROOM_TYPE + ) + if create_stripped_event is not None + else None + ) + + # Find whether the room is_encrypted + encryption_stripped_event = stripped_state_map.get( + (EventTypes.RoomEncryption, "") + ) + encryption = ( + encryption_stripped_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + if encryption_stripped_event is not None + else None + ) + insert_values["is_encrypted"] = encryption is not None + + # Find the room_name + room_name_stripped_event = stripped_state_map.get( + (EventTypes.Name, "") + ) + insert_values["room_name"] = ( + room_name_stripped_event.content.get( + EventContentFields.ROOM_NAME + ) + if room_name_stripped_event is not None + else None + ) + + else: + # No strip state provided + insert_values["has_known_state"] = False + insert_values["room_type"] = None + insert_values["room_name"] = None + insert_values["is_encrypted"] = False + else: + if event.membership == Membership.LEAVE: + # Inherit the meta data from the remote invite/knock. When using + # sliding sync filters, this will prevent the room from + # disappearing/appearing just because you left the room. + pass + elif event.membership in (Membership.INVITE, Membership.KNOCK): + # No strip state provided + insert_values["has_known_state"] = False + insert_values["room_type"] = None + insert_values["room_name"] = None + insert_values["is_encrypted"] = False + else: + # We don't know how to handle this type of membership yet + # + # FIXME: We should use `assert_never` here but for some reason + # the exhaustive matching doesn't recognize the `Never` here. + # assert_never(event.membership) + raise AssertionError( + f"Unexpected out-of-band membership {event.membership} ({event.event_id}) that we don't know how to handle yet" + ) + + self.db_pool.simple_upsert_txn( + txn, + table="sliding_sync_membership_snapshots", + keyvalues={ + "room_id": event.room_id, + "user_id": event.state_key, + }, + values=insert_values, + ) + def _handle_event_relations( self, txn: LoggingTransaction, event: EventBase ) -> None: diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 4e8fd0cd2dc..81f5e271f36 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -46,7 +46,14 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( -- to find all membership for a given user and shares the same semantics as -- `local_current_membership`. And we get to avoid some table maintenance; if we only -- stored non-joins, we would have to delete the row for the user when the user joins --- the room. +-- the room. Stripped state doesn't include the `m.room.tombstone` event, so we just +-- assume that the room doesn't have a tombstone. +-- +-- For remote invite/knocks where the server is not participating in the room, we will +-- use stripped state events to populate this table. We assume that if any stripped +-- state is given, it will include all possible stripped state events types. For +-- example, if stripped state is given but `m.room.encryption` isn't included, we will +-- assume that the room is not encrypted. -- -- We don't include `bump_stamp` here because we can just use the `stream_ordering` from -- the membership event itself as the `bump_stamp`. @@ -57,6 +64,11 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( membership TEXT NOT NULL, -- `stream_ordering` of the `membership_event_id` event_stream_ordering BIGINT NOT NULL REFERENCES events(stream_ordering), + -- For remote invites/knocks that don't include any stripped state, we want to be + -- able to distinguish between a room with `None` as valid value for some state and + -- room where the state is completely unknown. Basically, this should be True unless + -- no stripped state was provided for a remote invite/knock (False). + has_known_state BOOLEAN DEFAULT 0 NOT NULL, -- `m.room.create` -> `content.type` (according to the current state at the time of -- the membership) room_type TEXT, diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 9a51baffab3..19b17fe4c8f 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -1407,6 +1407,7 @@ def test_non_join_invite_ban(self) -> None: ) # TODO: Test remote invite + # TODO: Test rejection of a remote invite # TODO Test for non-join membership changing From ab074f5335c0a7c2dbbafbd9dccb717e94d0b262 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 19:40:53 -0500 Subject: [PATCH 026/142] Fix events from rooms we're not joined to affecting the joined room stream ordering --- synapse/storage/databases/main/events.py | 145 ++++++++------ tests/storage/test_events.py | 229 ++++++++++++++++++++++- 2 files changed, 305 insertions(+), 69 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 984a54bf2e2..95bf0ce660a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -609,6 +609,10 @@ def _persist_events_txn( txn, room_id, state_delta_for_room, min_stream_order ) + self._update_sliding_sync_tables_with_new_persisted_events_txn( + txn, events_and_contexts + ) + def _persist_event_auth_chain_txn( self, txn: LoggingTransaction, @@ -1631,6 +1635,84 @@ def _update_current_state_txn( txn, {m for m in members_to_cache_bust if not self.hs.is_mine_id(m)} ) + def _update_sliding_sync_tables_with_new_persisted_events_txn( + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> None: + """ + Update the latest `event_stream_ordering`/`bump_stamp` columns in the + `sliding_sync_joined_rooms` table for the room with new events. + + This function assumes that `_store_event_txn()` (to persist the event) and + `_update_current_state_txn(...)` (so that `sliding_sync_joined_rooms` table has + been updated with rooms that were joined) have already been run. + + Args: + txn + events_and_contexts: The events being persisted + """ + + # Handle updating `sliding_sync_joined_rooms` + room_id_to_stream_ordering_map: Dict[str, int] = {} + room_id_to_bump_stamp_map: Dict[str, int] = {} + for event, _ in events_and_contexts: + existing_stream_ordering = room_id_to_stream_ordering_map.get(event.room_id) + # This should exist for persisted events + assert event.internal_metadata.stream_ordering is not None + + # Ignore backfilled events which will have a negative stream ordering + if event.internal_metadata.stream_ordering < 0: + continue + + if ( + existing_stream_ordering is None + or existing_stream_ordering < event.internal_metadata.stream_ordering + ): + room_id_to_stream_ordering_map[event.room_id] = ( + event.internal_metadata.stream_ordering + ) + + if event.type in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES: + existing_bump_stamp = room_id_to_bump_stamp_map.get(event.room_id) + # This should exist at this point because we're inserting events here which require it + assert event.internal_metadata.stream_ordering is not None + if ( + existing_bump_stamp is None + or existing_bump_stamp < event.internal_metadata.stream_ordering + ): + room_id_to_bump_stamp_map[event.room_id] = ( + event.internal_metadata.stream_ordering + ) + + txn.execute_batch( + """ + UPDATE sliding_sync_joined_rooms + SET + event_stream_ordering = CASE + WHEN event_stream_ordering IS NULL OR event_stream_ordering < ? + THEN ? + ELSE event_stream_ordering + END, + bump_stamp = CASE + WHEN bump_stamp IS NULL OR bump_stamp < ? + THEN ? + ELSE bump_stamp + END + WHERE room_id = ? + """, + [ + [ + room_id_to_stream_ordering_map[room_id], + room_id_to_stream_ordering_map[room_id], + room_id_to_bump_stamp_map.get(room_id), + room_id_to_bump_stamp_map.get(room_id), + room_id, + ] + for room_id in room_id_to_stream_ordering_map.keys() + ], + ) + def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: """Update the room version in the database based off current state events. @@ -1976,67 +2058,6 @@ def event_dict(event: EventBase) -> JsonDict: ], ) - # Handle updating `sliding_sync_joined_rooms` - room_id_to_stream_ordering_map: Dict[str, int] = {} - room_id_to_bump_stamp_map: Dict[str, int] = {} - for event, _ in events_and_contexts: - existing_stream_ordering = room_id_to_stream_ordering_map.get(event.room_id) - # This should exist at this point because we're inserting events here which require it - assert event.internal_metadata.stream_ordering is not None - if ( - existing_stream_ordering is None - or existing_stream_ordering < event.internal_metadata.stream_ordering - ): - room_id_to_stream_ordering_map[event.room_id] = ( - event.internal_metadata.stream_ordering - ) - - if event.type in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES: - existing_bump_stamp = room_id_to_bump_stamp_map.get(event.room_id) - # This should exist at this point because we're inserting events here which require it - assert event.internal_metadata.stream_ordering is not None - if ( - existing_bump_stamp is None - or existing_bump_stamp < event.internal_metadata.stream_ordering - ): - room_id_to_bump_stamp_map[event.room_id] = ( - event.internal_metadata.stream_ordering - ) - - # This function (`_store_event_txn(...)`) is run before - # `_update_current_state_txn(...)` which handles deleting the rows if we are no - # longer in the room so we don't need to worry about inserting something that - # will be orphaned. - txn.execute_batch( - """ - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, bump_stamp) - VALUES ( - ?, ?, ? - ) - ON CONFLICT (room_id) - DO UPDATE SET - event_stream_ordering = CASE - WHEN event_stream_ordering IS NULL OR event_stream_ordering < EXCLUDED.event_stream_ordering - THEN EXCLUDED.event_stream_ordering - ELSE event_stream_ordering - END, - bump_stamp = CASE - WHEN bump_stamp IS NULL OR bump_stamp < EXCLUDED.bump_stamp - THEN EXCLUDED.bump_stamp - ELSE bump_stamp - END - """, - [ - [ - room_id, - room_id_to_stream_ordering_map[room_id], - room_id_to_bump_stamp_map.get(room_id), - ] - for room_id in room_id_to_stream_ordering_map.keys() - ], - ) - def _store_rejected_events_txn( self, txn: LoggingTransaction, @@ -2385,6 +2406,8 @@ def _store_room_members_txn( ) # Sanity check that we at-least have the create event if create_stripped_event is not None: + insert_values["has_known_state"] = True + # Find the room_type insert_values["room_type"] = ( create_stripped_event.content.get( diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 19b17fe4c8f..fa9e7717fab 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -28,7 +28,8 @@ from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes from synapse.api.room_versions import RoomVersions -from synapse.events import EventBase +from synapse.events import EventBase, StrippedStateEvent, make_event_from_dict +from synapse.events.snapshot import EventContext from synapse.federation.federation_base import event_from_pdu_json from synapse.rest import admin from synapse.rest.client import login, room @@ -500,7 +501,7 @@ class _SlidingSyncJoinedRoomResult: bump_stamp: Optional[int] room_type: Optional[str] room_name: Optional[str] - is_encrypted: Optional[bool] + is_encrypted: bool @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -515,9 +516,10 @@ class _SlidingSyncMembershipSnapshotResult: # exists for persisted events but in the context of these tests, we're only working # with persisted events and we're making comparisons so we will find any mismatch. event_stream_ordering: Optional[int] + has_known_state: bool room_type: Optional[str] room_name: Optional[str] - is_encrypted: Optional[bool] + is_encrypted: bool class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): @@ -569,7 +571,7 @@ def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResu bump_stamp=row[2], room_type=row[3], room_name=row[4], - is_encrypted=row[5], + is_encrypted=bool(row[5]), ) for row in rows } @@ -584,7 +586,7 @@ def _get_sliding_sync_membership_snapshots( Mapping from the (room_id, user_id) to _SlidingSyncMembershipSnapshotResult. """ rows = cast( - List[Tuple[str, str, str, str, int, str, str, bool]], + List[Tuple[str, str, str, str, int, bool, str, str, bool]], self.get_success( self.store.db_pool.simple_select_list( "sliding_sync_membership_snapshots", @@ -595,6 +597,7 @@ def _get_sliding_sync_membership_snapshots( "membership_event_id", "membership", "event_stream_ordering", + "has_known_state", "room_type", "room_name", "is_encrypted", @@ -610,13 +613,88 @@ def _get_sliding_sync_membership_snapshots( membership_event_id=row[2], membership=row[3], event_stream_ordering=row[4], - room_type=row[5], - room_name=row[6], - is_encrypted=row[7], + has_known_state=bool(row[5]), + room_type=row[6], + room_name=row[7], + is_encrypted=bool(row[8]), ) for row in rows } + _remote_invite_count: int = 0 + + def _create_remote_invite_room_for_user( + self, + invitee_user_id: str, + unsigned_invite_room_state: Optional[List[StrippedStateEvent]], + ) -> Tuple[str, EventBase]: + """ + Create a fake invite for a remote room and persist it. + + We don't have any state for these kind of rooms and can only rely on the + stripped state included in the unsigned portion of the invite event to identify + the room. + + Args: + invitee_user_id: The person being invited + unsigned_invite_room_state: List of stripped state events to assist the + receiver in identifying the room. + + Returns: + The room ID of the remote invite room and the persisted remote invite event. + """ + invite_room_id = f"!test_room{self._remote_invite_count}:remote_server" + + invite_event_dict = { + "room_id": invite_room_id, + "sender": "@inviter:remote_server", + "state_key": invitee_user_id, + "depth": 1, + "origin_server_ts": 1, + "type": EventTypes.Member, + "content": {"membership": Membership.INVITE}, + "auth_events": [], + "prev_events": [], + } + if unsigned_invite_room_state is not None: + serialized_stripped_state_events = [] + for stripped_event in unsigned_invite_room_state: + serialized_stripped_state_events.append( + { + "type": stripped_event.type, + "state_key": stripped_event.state_key, + "sender": stripped_event.sender, + "content": stripped_event.content, + } + ) + + invite_event_dict["unsigned"] = { + "invite_room_state": serialized_stripped_state_events + } + + invite_event = make_event_from_dict( + invite_event_dict, + room_version=RoomVersions.V10, + ) + invite_event.internal_metadata.outlier = True + invite_event.internal_metadata.out_of_band_membership = True + + self.get_success( + self.store.maybe_store_room_on_outlier_membership( + room_id=invite_room_id, room_version=invite_event.room_version + ) + ) + context = EventContext.for_outlier(self.hs.get_storage_controllers()) + persist_controller = self.hs.get_storage_controllers().persistence + assert persist_controller is not None + persisted_event, _, _ = self.get_success( + persist_controller.persist_event(invite_event, context) + ) + + self._remote_invite_count += 1 + + return invite_room_id, persisted_event + def test_joined_room_with_no_info(self) -> None: """ Test joined room that doesn't have a room type, encryption, or name shows up in @@ -675,6 +753,7 @@ def test_joined_room_with_no_info(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user1_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, @@ -758,6 +837,7 @@ def test_joined_room_with_info(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user1_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name="my super duper room", is_encrypted=True, @@ -774,6 +854,7 @@ def test_joined_room_with_info(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user2_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, # Even though this room does have a name and is encrypted, user2 is the # room creator and joined at the room creation time which didn't have @@ -859,6 +940,7 @@ def test_joined_space_room_with_info(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user1_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, @@ -875,6 +957,7 @@ def test_joined_space_room_with_info(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user2_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=RoomTypes.SPACE, # Even though this room does have a name, user2 is the room creator and # joined at the room creation time which didn't have this state set yet. @@ -1005,6 +1088,7 @@ def test_joined_room_with_state_updated(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user1_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name="my super duper room", is_encrypted=False, @@ -1021,6 +1105,7 @@ def test_joined_room_with_state_updated(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user2_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, @@ -1096,6 +1181,7 @@ def test_joined_room_is_bumped(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user1_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name="my super duper room", is_encrypted=False, @@ -1113,6 +1199,7 @@ def test_joined_room_is_bumped(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user2_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, @@ -1269,6 +1356,7 @@ def test_non_join_space_room_with_info(self) -> None: membership_event_id=user1_invited_response["event_id"], membership=Membership.INVITE, event_stream_ordering=user1_invited_event_pos.stream, + has_known_state=True, room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=True, @@ -1285,6 +1373,7 @@ def test_non_join_space_room_with_info(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user2_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=RoomTypes.SPACE, room_name=None, is_encrypted=False, @@ -1370,6 +1459,7 @@ def test_non_join_invite_ban(self) -> None: membership_event_id=user1_invited_response["event_id"], membership=Membership.INVITE, event_stream_ordering=user1_invited_event_pos.stream, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, @@ -1386,6 +1476,7 @@ def test_non_join_invite_ban(self) -> None: event_stream_ordering=state_map[ (EventTypes.Member, user2_id) ].internal_metadata.stream_ordering, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, @@ -1400,12 +1491,132 @@ def test_non_join_invite_ban(self) -> None: membership_event_id=user3_ban_response["event_id"], membership=Membership.BAN, event_stream_ordering=user3_ban_event_pos.stream, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, ), ) + def test_non_join_remote_invite_no_stripped_state(self) -> None: + """ + Test remote invite with no stripped state provided shows up in + `sliding_sync_membership_snapshots` with `has_known_state=False`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room without any `unsigned.invite_room_state` + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user(user1_id, None) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, + # No stripped state provided + has_known_state=False, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + + def test_non_join_remote_invite_encrypted_room(self) -> None: + """ + Test remote invite with stripped state (encrypted room) shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=True, + ), + ) + # TODO: Test remote invite # TODO: Test rejection of a remote invite @@ -1467,6 +1678,7 @@ def test_non_join_server_left_room(self) -> None: membership_event_id=user1_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user1_leave_event_pos.stream, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, @@ -1480,6 +1692,7 @@ def test_non_join_server_left_room(self) -> None: membership_event_id=user2_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user2_leave_event_pos.stream, + has_known_state=True, room_type=None, room_name=None, is_encrypted=False, From 3e1f24ea11367743a91004f741f399a0b1e7c1ff Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 19:46:23 -0500 Subject: [PATCH 027/142] User ID is not unique because user is joined to many rooms --- .../schema/main/delta/87/01_sliding_sync_memberships.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 81f5e271f36..eb31cc7aff2 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -84,6 +84,6 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( ); -- So we can fetch all rooms for a given user -CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_user_id ON sliding_sync_membership_snapshots(user_id); +CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_user_id ON sliding_sync_membership_snapshots(user_id); -- So we can sort by `stream_ordering CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream_ordering ON sliding_sync_membership_snapshots(event_stream_ordering); From 83a5858083acbf357bfb65a0886e52937145de5b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 19:57:28 -0500 Subject: [PATCH 028/142] Add tests for remote invites --- tests/storage/test_events.py | 266 ++++++++++++++++++++++++++++------- 1 file changed, 213 insertions(+), 53 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index fa9e7717fab..117f9a2d092 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -24,6 +24,7 @@ import attr +from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes @@ -1498,7 +1499,94 @@ def test_non_join_invite_ban(self) -> None: ), ) - def test_non_join_remote_invite_no_stripped_state(self) -> None: + def test_non_join_server_left_room(self) -> None: + """ + Test everyone local leaves the room but their leave membership still shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # User2 leaves the room + user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) + user2_leave_event_pos = self.get_success( + self.store.get_position_for_event(user2_leave_response["event_id"]) + ) + + # User1 leaves the room + user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) + user1_leave_event_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + + # No one is joined to the room anymore so we shouldn't have an entry in the + # `sliding_sync_joined_rooms` table. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # We should still see rows for the leave events (non-joins) + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=user2_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user2_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + + @parameterized.expand( + [ + # No stripped state provided + ("none", None), + # Empty stripped state provided + ("empty", []), + ] + ) + def test_non_join_remote_invite_no_stripped_state( + self, _description: str, stripped_state: Optional[List[StrippedStateEvent]] + ) -> None: """ Test remote invite with no stripped state provided shows up in `sliding_sync_membership_snapshots` with `has_known_state=False`. @@ -1508,7 +1596,7 @@ def test_non_join_remote_invite_no_stripped_state(self) -> None: # Create a remote invite room without any `unsigned.invite_room_state` remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user(user1_id, None) + self._create_remote_invite_room_for_user(user1_id, stripped_state) ) # No one local is joined to the remote room @@ -1547,9 +1635,9 @@ def test_non_join_remote_invite_no_stripped_state(self) -> None: ), ) - def test_non_join_remote_invite_encrypted_room(self) -> None: + def test_non_join_remote_invite_unencrypted_room(self) -> None: """ - Test remote invite with stripped state (encrypted room) shows up in + Test remote invite with stripped state (unencrypted room) shows up in `sliding_sync_membership_snapshots`. """ user1_id = self.register_user("user1", "pass") @@ -1571,11 +1659,11 @@ def test_non_join_remote_invite_encrypted_room(self) -> None: }, ), StrippedStateEvent( - type=EventTypes.RoomEncryption, + type=EventTypes.Name, state_key="", sender="@inviter:remote_server", content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + EventContentFields.ROOM_NAME: "my super duper room", }, ), ], @@ -1612,45 +1700,47 @@ def test_non_join_remote_invite_encrypted_room(self) -> None: event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, has_known_state=True, room_type=None, - room_name=None, - is_encrypted=True, + room_name="my super duper room", + is_encrypted=False, ), ) - # TODO: Test remote invite - # TODO: Test rejection of a remote invite - - # TODO Test for non-join membership changing - - def test_non_join_server_left_room(self) -> None: + def test_non_join_remote_invite_encrypted_room(self) -> None: """ - Test everyone local leaves the room but their leave membership still shows up in + Test remote invite with stripped state (encrypted room) shows up in `sliding_sync_membership_snapshots`. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id1, user1_id, tok=user1_tok) - - # User2 leaves the room - user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) - user2_leave_event_pos = self.get_success( - self.store.get_position_for_event(user2_leave_response["event_id"]) - ) + _user1_tok = self.login(user1_id, "pass") - # User1 leaves the room - user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) - user1_leave_event_pos = self.get_success( - self.store.get_position_for_event(user1_leave_response["event_id"]) + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) ) - # No one is joined to the room anymore so we shouldn't have an entry in the - # `sliding_sync_joined_rooms` table. + # No one local is joined to the remote room sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( set(sliding_sync_joined_rooms_results.keys()), @@ -1658,45 +1748,115 @@ def test_non_join_server_left_room(self) -> None: exact=True, ) - # We should still see rows for the leave events (non-joins) sliding_sync_membership_snapshots_results = ( self._get_sliding_sync_membership_snapshots() ) self.assertIncludes( set(sliding_sync_membership_snapshots_results.keys()), { - (room_id1, user1_id), - (room_id1, user2_id), + (remote_invite_room_id, user1_id), }, exact=True, ) self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, + room_id=remote_invite_room_id, user_id=user1_id, - membership_event_id=user1_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user1_leave_event_pos.stream, + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, has_known_state=True, room_type=None, room_name=None, - is_encrypted=False, + is_encrypted=True, ), ) + + def test_non_join_remote_invite_space_room(self) -> None: + """ + Test remote invite with stripped state (encrypted space room with name) shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + # Specify that it is a space room + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper space", + }, + ), + ], + ) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - membership_event_id=user2_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user2_leave_event_pos.stream, + room_id=remote_invite_room_id, + user_id=user1_id, + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=True, ), ) + # TODO: Test rejection of a remote invite + + # TODO Test for non-join membership changing + # TODO: test_non_join_state_reset From 5589ae48ca99c59833ea4a7116a798ef44dacf72 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 20:14:14 -0500 Subject: [PATCH 029/142] Add test for remote invite rejected/retracted --- tests/storage/test_events.py | 211 ++++++++++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 1 deletion(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 117f9a2d092..3cf459d8bf2 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -696,6 +696,60 @@ def _create_remote_invite_room_for_user( return invite_room_id, persisted_event + def _retract_remote_invite_for_user( + self, + user_id: str, + remote_room_id: str, + ) -> EventBase: + """ + Create a fake invite retraction for a remote room and persist it. + + Retracting an invite just means the person is no longer invited to the room. + This is done by someone with proper power levels kicking the user from the room. + A kick shows up as a leave event for a given person with a different `sender`. + + Args: + user_id: The person who was invited and we're going to retract the + invite for. + remote_room_id: The room ID that the invite was for. + + Returns: + The persisted leave (kick) event. + """ + + kick_event_dict = { + "room_id": remote_room_id, + "sender": "@inviter:remote_server", + "state_key": user_id, + "depth": 1, + "origin_server_ts": 1, + "type": EventTypes.Member, + "content": {"membership": Membership.LEAVE}, + "auth_events": [], + "prev_events": [], + } + + kick_event = make_event_from_dict( + kick_event_dict, + room_version=RoomVersions.V10, + ) + kick_event.internal_metadata.outlier = True + kick_event.internal_metadata.out_of_band_membership = True + + self.get_success( + self.store.maybe_store_room_on_outlier_membership( + room_id=remote_room_id, room_version=kick_event.room_version + ) + ) + context = EventContext.for_outlier(self.hs.get_storage_controllers()) + persist_controller = self.hs.get_storage_controllers().persistence + assert persist_controller is not None + persisted_event, _, _ = self.get_success( + persist_controller.persist_event(kick_event, context) + ) + + return persisted_event + def test_joined_room_with_no_info(self) -> None: """ Test joined room that doesn't have a room type, encryption, or name shows up in @@ -1855,7 +1909,162 @@ def test_non_join_remote_invite_space_room(self) -> None: ), ) - # TODO: Test rejection of a remote invite + def test_non_join_rejected_remote_invite(self) -> None: + """ + Test rejected remote invite (user decided to leave the room) inherits meta data + from when the remote invite stripped state and shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + # User1 decides to leave the room (reject the invite) + user1_leave_response = self.helper.leave( + remote_invite_room_id, user1_id, tok=user1_tok + ) + user1_leave_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=True, + ), + ) + + def test_non_join_retracted_remote_invite(self) -> None: + """ + Test retracted remote invite (Remote inviter kicks the person who was invited) + inherits meta data from when the remote invite stripped state and shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + # `@inviter:remote_server` decides to retract the invite (kicks the user). + # (Note: A kick is just a leave event with a different sender) + remote_invite_retraction_event = self._retract_remote_invite_for_user( + user_id=user1_id, + remote_room_id=remote_invite_room_id, + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + membership_event_id=remote_invite_retraction_event.event_id, + membership=Membership.LEAVE, + event_stream_ordering=remote_invite_retraction_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=True, + ), + ) # TODO Test for non-join membership changing From 3423eb72d553e28ad5e69f2c06d291a76f34b017 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 20:29:58 -0500 Subject: [PATCH 030/142] Add test to make sure snapshot evolves with membership --- tests/storage/test_events.py | 237 ++++++++++++++++++++++++++++++++++- 1 file changed, 235 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 3cf459d8bf2..110163dddaa 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -1553,6 +1553,241 @@ def test_non_join_invite_ban(self) -> None: ), ) + def test_membership_changing(self) -> None: + """ + Test latest membership change is reflected in `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to the room + # ====================================================== + user1_invited_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invited_event_pos = self.get_success( + self.store.get_position_for_event(user1_invited_response["event_id"]) + ) + + # Update the room name after the user was invited + room_name_update_response = self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + room_name_update_event_pos = self.get_success( + self.store.get_position_for_event(room_name_update_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + # Assert joined room status + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Latest event in the room + event_stream_ordering=room_name_update_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + # Assert membership snapshots + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was invited + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_invited_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=user1_invited_event_pos.stream, + has_known_state=True, + room_type=None, + # Room name was updated after the user was invited so we should still + # see it unset here + room_name=None, + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + # User1 joins the room + # ====================================================== + user1_joined_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_joined_event_pos = self.get_success( + self.store.get_position_for_event(user1_joined_response["event_id"]) + ) + + # Assert joined room status + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Latest event in the room + event_stream_ordering=user1_joined_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + # Assert membership snapshots + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_joined_response["event_id"], + membership=Membership.JOIN, + event_stream_ordering=user1_joined_event_pos.stream, + has_known_state=True, + room_type=None, + # We see the update state because the user joined after the room name + # change + room_name="my super duper room", + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + # User1 is banned from the room + # ====================================================== + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_ban_event_pos = self.get_success( + self.store.get_position_for_event(user1_ban_response["event_id"]) + ) + + # Assert joined room status + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Latest event in the room + event_stream_ordering=user1_ban_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + # Assert membership snapshots + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was banned + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_ban_response["event_id"], + membership=Membership.BAN, + event_stream_ordering=user1_ban_event_pos.stream, + has_known_state=True, + room_type=None, + # We see the update state because the user joined after the room name + # change + room_name="my super duper room", + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + def test_non_join_server_left_room(self) -> None: """ Test everyone local leaves the room but their leave membership still shows up in @@ -2066,6 +2301,4 @@ def test_non_join_retracted_remote_invite(self) -> None: ), ) - # TODO Test for non-join membership changing - # TODO: test_non_join_state_reset From f600eacd0d5d954c408cce868ac31c9ecffc3b5f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 20:30:48 -0500 Subject: [PATCH 031/142] Adjust test description --- tests/storage/test_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 110163dddaa..a1063ac6f08 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -1555,7 +1555,7 @@ def test_non_join_invite_ban(self) -> None: def test_membership_changing(self) -> None: """ - Test latest membership change is reflected in `sliding_sync_membership_snapshots`. + Test latest snapshot evolves when membership changes (`sliding_sync_membership_snapshots`). """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") From 517946d940bd72925e43857e5b87b5ac0a3f20b5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Aug 2024 20:31:25 -0500 Subject: [PATCH 032/142] Fix lints --- tests/storage/test_events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index a1063ac6f08..acc84f4231c 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -23,8 +23,8 @@ from typing import Dict, List, Optional, Tuple, cast import attr - from parameterized import parameterized + from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes @@ -2230,7 +2230,7 @@ def test_non_join_retracted_remote_invite(self) -> None: `sliding_sync_membership_snapshots`. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") # Create a remote invite room with some `unsigned.invite_room_state` # indicating that the room is encrypted. From eb3a185cfca51c4b46741f1f351d064643a5bf94 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 12:24:53 -0500 Subject: [PATCH 033/142] Fix federating backfill test --- synapse/storage/databases/main/events.py | 7 --- .../client/sliding_sync/test_rooms_meta.py | 47 +++++++++++++++---- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 95bf0ce660a..222001807bf 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1172,13 +1172,6 @@ def _update_current_state_txn( to_delete = delta_state.to_delete to_insert = delta_state.to_insert - logger.info( - "asdf _update_current_state_txn no_longer_in_room=%s to_insert=%s to_delete=%s", - delta_state.no_longer_in_room, - to_insert, - to_delete, - ) - # Figure out the changes of membership to invalidate the # `get_rooms_for_user` cache. # We find out which membership events we may have deleted diff --git a/tests/rest/client/sliding_sync/test_rooms_meta.py b/tests/rest/client/sliding_sync/test_rooms_meta.py index 04f11c05241..d9050c1fe77 100644 --- a/tests/rest/client/sliding_sync/test_rooms_meta.py +++ b/tests/rest/client/sliding_sync/test_rooms_meta.py @@ -16,7 +16,7 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.rest.client import login, room, sync from synapse.server import HomeServer @@ -44,6 +44,7 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main self.storage_controllers = hs.get_storage_controllers() + self.state_handler = self.hs.get_state_handler() def test_rooms_meta_when_joined(self) -> None: """ @@ -607,9 +608,10 @@ def test_rooms_bump_stamp_backfill(self) -> None: # Create a remote room creator = "@user:other" room_id = "!foo:other" + room_version = RoomVersions.V10 shared_kwargs = { "room_id": room_id, - "room_version": "10", + "room_version": room_version.identifier, } create_tuple = self.get_success( @@ -618,6 +620,12 @@ def test_rooms_bump_stamp_backfill(self) -> None: prev_event_ids=[], type=EventTypes.Create, state_key="", + content={ + # The `ROOM_CREATOR` field could be removed if we used a room + # version > 10 (in favor of relying on `sender`) + EventContentFields.ROOM_CREATOR: creator, + EventContentFields.ROOM_VERSION: room_version.identifier, + }, sender=creator, **shared_kwargs, ) @@ -667,9 +675,7 @@ def test_rooms_bump_stamp_backfill(self) -> None: ] # Ensure the local HS knows the room version - self.get_success( - self.store.store_room(room_id, creator, False, RoomVersions.V10) - ) + self.get_success(self.store.store_room(room_id, creator, False, room_version)) # Persist these events as backfilled events. persistence = self.hs.get_storage_controllers().persistence @@ -678,11 +684,21 @@ def test_rooms_bump_stamp_backfill(self) -> None: for event, context in remote_events_and_contexts: self.get_success(persistence.persist_event(event, context, backfilled=True)) - # Now we join the local user to the room - join_tuple = self.get_success( + # Now we join the local user to the room. We want to make this feel as close to + # the real `process_remote_join()` as possible but we'd like to avoid some of + # the auth checks that would be done in the real code. + # + # FIXME: The test was originally written using this less-real shortcut but it + # would be nice to use the real remote join process in a + # `FederatingHomeserverTestCase`. + flawed_join_tuple = self.get_success( create_event( self.hs, prev_event_ids=[invite_tuple[0].event_id], + # This doesn't work correctly to create an `EventContext` that includes + # both of these state events. I assume it's because we're working on our + # local homeserver which has the remote state set as `outlier`. We have + # to create our own EventContext below to get this right. auth_event_ids=[create_tuple[0].event_id, invite_tuple[0].event_id], type=EventTypes.Member, state_key=user1_id, @@ -691,7 +707,22 @@ def test_rooms_bump_stamp_backfill(self) -> None: **shared_kwargs, ) ) - self.get_success(persistence.persist_event(*join_tuple)) + # We have to create our own context to get the state set correctly. If we use + # the `EventContext` from the `flawed_join_tuple`, the `current_state_events` + # table will only have the join event in it which should never happen in our + # real server. + join_event = flawed_join_tuple[0] + join_context = self.get_success( + self.state_handler.compute_event_context( + join_event, + state_ids_before_event={ + (e.type, e.state_key): e.event_id + for e in [create_tuple[0], invite_tuple[0]] + }, + partial_state=False, + ) + ) + self.get_success(persistence.persist_event(join_event, join_context)) # Doing an SS request should return a positive `bump_stamp`, even though # the only event that matches the bump types has as negative stream From 32ae1622786e1283b91ded3a2cc69c20aaf6b52d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 14:35:24 -0500 Subject: [PATCH 034/142] Fix rejecting invite when no_longer_in_room (and other non-join transitions) --- synapse/storage/databases/main/events.py | 139 +++++++++++++---------- tests/storage/test_events.py | 76 +++++++++++++ 2 files changed, 155 insertions(+), 60 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 222001807bf..7a2bcba3880 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1199,7 +1199,7 @@ def _update_current_state_txn( # We handle `sliding_sync_membership_snapshots` before `current_state_events` so # we can gather the current state before it might be deleted if we are - # `no_longer_in_room`. + # last ones in the room and now we are `no_longer_in_room`. # # We do this regardless of whether the server is `no_longer_in_room` or not # because we still want a row if a local user was just left/kicked or got banned @@ -1211,11 +1211,6 @@ def _update_current_state_txn( membership_event_id_to_user_id_map[event_id] = state_key[1] if len(membership_event_id_to_user_id_map) > 0: - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} - relevant_state_set = { (EventTypes.Create, ""), (EventTypes.RoomEncryption, ""), @@ -1256,55 +1251,80 @@ def _update_current_state_txn( if state_key in relevant_state_set: current_state_map[state_key] = event_id - # Fetch the raw event JSON from the database - ( - event_id_in_list_clause, - event_id_args, - ) = make_in_list_sql_clause( - self.database_engine, - "event_id", - current_state_map.values(), - ) - txn.execute( - f""" - SELECT event_id, type, state_key, json FROM event_json - INNER JOIN events USING (event_id) - WHERE {event_id_in_list_clause} - """, - event_id_args, - ) + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + if current_state_map: + # We have current state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( + True + ) - # Parse the raw event JSON - for row in txn: - event_id, event_type, state_key, json = row - event_json = db_to_json(json) + # Fetch the raw event JSON from the database + ( + event_id_in_list_clause, + event_id_args, + ) = make_in_list_sql_clause( + self.database_engine, + "event_id", + current_state_map.values(), + ) + txn.execute( + f""" + SELECT event_id, type, state_key, json FROM event_json + INNER JOIN events USING (event_id) + WHERE {event_id_in_list_clause} + """, + event_id_args, + ) - if event_type == EventTypes.Create: - room_type = event_json.get("content", {}).get( - EventContentFields.ROOM_TYPE - ) - sliding_sync_membership_snapshots_insert_map["room_type"] = ( - room_type - ) - elif event_type == EventTypes.RoomEncryption: - encryption_algorithm = event_json.get("content", {}).get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is_encrypted = encryption_algorithm is not None - sliding_sync_membership_snapshots_insert_map["is_encrypted"] = ( - is_encrypted - ) - elif event_type == EventTypes.Name: - room_name = event_json.get("content", {}).get( - EventContentFields.ROOM_NAME - ) - sliding_sync_membership_snapshots_insert_map["room_name"] = ( - room_name - ) - else: - raise AssertionError( - f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" - ) + # Parse the raw event JSON + for row in txn: + event_id, event_type, state_key, json = row + event_json = db_to_json(json) + + if event_type == EventTypes.Create: + room_type = event_json.get("content", {}).get( + EventContentFields.ROOM_TYPE + ) + sliding_sync_membership_snapshots_insert_map[ + "room_type" + ] = room_type + elif event_type == EventTypes.RoomEncryption: + encryption_algorithm = event_json.get("content", {}).get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is_encrypted = encryption_algorithm is not None + sliding_sync_membership_snapshots_insert_map[ + "is_encrypted" + ] = is_encrypted + elif event_type == EventTypes.Name: + room_name = event_json.get("content", {}).get( + EventContentFields.ROOM_NAME + ) + sliding_sync_membership_snapshots_insert_map[ + "room_name" + ] = room_name + else: + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" + ) + else: + # We don't have any `current_state_events` anymore (previously + # cleared out because of `no_longer_in_room`). This can happen if + # one user is joined and another is invited (some non-join + # membership). If the joined user leaves, we are `no_longer_in_room` + # and `current_state_events` is cleared out. When the invited user + # rejects the invite (leaves the room), we will end up here. + # + # In these cases, we should inherit the meta data from the previous + # snapshot. When using sliding sync filters, this will prevent the + # room from disappearing/appearing just because you left the room. + # + # Ideally, we could additionally assert that we're only here for + # valid non-join membership transitions. + assert delta_state.no_longer_in_room # Update the `sliding_sync_membership_snapshots` table # @@ -1315,20 +1335,20 @@ def _update_current_state_txn( txn.execute_batch( f""" INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering, has_known_state, {", ".join(insert_keys)}) + (room_id, user_id, membership_event_id, membership, event_stream_ordering + {"," + (", ".join(insert_keys)) if insert_keys else ""}) VALUES ( ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?), - ?, - {", ".join("?" for _ in insert_values)} + (SELECT stream_ordering FROM events WHERE event_id = ?) + {"," + (", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) DO UPDATE SET membership_event_id = EXCLUDED.membership_event_id, membership = EXCLUDED.membership, - event_stream_ordering = EXCLUDED.event_stream_ordering, - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + event_stream_ordering = EXCLUDED.event_stream_ordering + {"," + (", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} """, [ [ @@ -1337,7 +1357,6 @@ def _update_current_state_txn( membership_event_id, membership_event_id, membership_event_id, - True, # has_known_state ] + list(insert_values) for membership_event_id, user_id in membership_event_id_to_user_id_map.items() diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index acc84f4231c..99b3a5676e2 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -1553,6 +1553,82 @@ def test_non_join_invite_ban(self) -> None: ), ) + def test_non_join_reject_invite_empty_room(self) -> None: + """ + In a room where no one is joined (`no_longer_in_room`), test rejecting an invite. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to the room + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + + # User2 leaves the room + user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) + user2_leave_event_pos = self.get_success( + self.store.get_position_for_event(user2_leave_response["event_id"]) + ) + + # User1 rejects the invite + user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) + user1_leave_event_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + + # No one is joined to the room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user left + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + # Holds the info according to the current state when the left + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + membership_event_id=user2_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user2_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + def test_membership_changing(self) -> None: """ Test latest snapshot evolves when membership changes (`sliding_sync_membership_snapshots`). From dc447a673fe26918436212875186035f0faa6d5c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 14:47:17 -0500 Subject: [PATCH 035/142] Clarify when/why we upsert --- synapse/storage/databases/main/events.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 7a2bcba3880..29cc5ec9f60 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1332,6 +1332,9 @@ def _update_current_state_txn( # lists insert_keys = sliding_sync_membership_snapshots_insert_map.keys() insert_values = sliding_sync_membership_snapshots_insert_map.values() + # We need to insert/update regardless of whether we have `insert_keys` + # because there are other fields in the `ON CONFLICT` upsert to run (see + # inherit case above for more context when this happens). txn.execute_batch( f""" INSERT INTO sliding_sync_membership_snapshots @@ -1547,9 +1550,10 @@ def _update_current_state_txn( # choose the best possible answer by using the "first" event ID which we # will assume will have the greatest `stream_ordering`. We really just # need *some* answer in case we are the first ones inserting into the - # table and in reality, `_store_event_txn()` is run before this function - # so it will already have the correct value. This is just to account for - # things changing in the future. + # table and in reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` is run + # after this function to update it to the correct latest value. This is + # just to account for things changing in the future. next(iter(to_insert.values())), ] # If we have a `bump_event_id`, let's update the `bump_stamp` column @@ -1565,7 +1569,11 @@ def _update_current_state_txn( insert_keys = sliding_sync_joined_rooms_insert_map.keys() insert_values = sliding_sync_joined_rooms_insert_map.values() args.extend(iter(insert_values)) - if len(insert_keys) > 0: + # We only need to update when one of the relevant state values has changed + if insert_keys: + # We don't update `event_stream_ordering` `ON CONFLICT` because it's simpler + # we can just + # # We don't update `bump_stamp` `ON CONFLICT` because we're dealing with # state here and the only state event that is also a bump event type is # `m.room.create`. Given the room creation event is the first one in the @@ -1584,12 +1592,7 @@ def _update_current_state_txn( ) ON CONFLICT (room_id) DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)}, - event_stream_ordering = CASE - WHEN event_stream_ordering IS NULL OR event_stream_ordering < EXCLUDED.event_stream_ordering - THEN EXCLUDED.event_stream_ordering - ELSE event_stream_ordering - END + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} """, args, ) From 96a4614f929711770d8ca1e9c34d3f35bbbb4035 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 14:50:11 -0500 Subject: [PATCH 036/142] Update fixme comment --- tests/rest/client/sliding_sync/test_rooms_meta.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/rest/client/sliding_sync/test_rooms_meta.py b/tests/rest/client/sliding_sync/test_rooms_meta.py index d9050c1fe77..a69987f61d8 100644 --- a/tests/rest/client/sliding_sync/test_rooms_meta.py +++ b/tests/rest/client/sliding_sync/test_rooms_meta.py @@ -688,9 +688,9 @@ def test_rooms_bump_stamp_backfill(self) -> None: # the real `process_remote_join()` as possible but we'd like to avoid some of # the auth checks that would be done in the real code. # - # FIXME: The test was originally written using this less-real shortcut but it - # would be nice to use the real remote join process in a - # `FederatingHomeserverTestCase`. + # FIXME: The test was originally written using this less-real + # `persist_event(...)` shortcut but it would be nice to use the real remote join + # process in a `FederatingHomeserverTestCase`. flawed_join_tuple = self.get_success( create_event( self.hs, From 3566abd9bcef7c8ea848289dc2aa66b3e2f4f61e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 15:14:48 -0500 Subject: [PATCH 037/142] Fix boolean schema for Postgres --- .../schema/main/delta/87/01_sliding_sync_memberships.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index eb31cc7aff2..2ad316e8e41 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( -- `m.room.name` -> `content.name` (current state) room_name TEXT, -- `m.room.encryption` -> `content.algorithm` (current state) - is_encrypted BOOLEAN DEFAULT 0 NOT NULL, + is_encrypted BOOLEAN DEFAULT FALSE NOT NULL, -- FIXME: Maybe we want to add `tombstone_successor_room_id` here to help with `include_old_rooms` -- (tracked by https://github.com/element-hq/synapse/issues/17540) PRIMARY KEY (room_id) @@ -68,7 +68,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( -- able to distinguish between a room with `None` as valid value for some state and -- room where the state is completely unknown. Basically, this should be True unless -- no stripped state was provided for a remote invite/knock (False). - has_known_state BOOLEAN DEFAULT 0 NOT NULL, + has_known_state BOOLEAN DEFAULT FALSE NOT NULL, -- `m.room.create` -> `content.type` (according to the current state at the time of -- the membership) room_type TEXT, @@ -77,7 +77,7 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( room_name TEXT, -- `m.room.encryption` -> `content.algorithm` (according to the current state at the -- time of the membership) - is_encrypted BOOLEAN DEFAULT 0 NOT NULL, + is_encrypted BOOLEAN DEFAULT FALSE NOT NULL, -- FIXME: Maybe we want to add `tombstone_successor_room_id` here to help with `include_old_rooms` -- (tracked by https://github.com/element-hq/synapse/issues/17540) PRIMARY KEY (room_id, user_id) From 5df94f47b5bab55451d9bd85e456f1bac65f633b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 17:15:09 -0500 Subject: [PATCH 038/142] Fix running into `StopIteration` More context about how/why `StopIteration` was being ignored silently which made this problem harder to debug. See https://github.com/element-hq/synapse/pull/17512#discussion_r1715954505 --- synapse/storage/databases/main/events.py | 49 +++++++++++++----------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 29cc5ec9f60..3f9ca263211 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1543,34 +1543,37 @@ def _update_current_state_txn( ) # Update the `sliding_sync_joined_rooms` table - args: List[Any] = [ - room_id, - # Even though `Mapping`/`Dict` have no guaranteed order, some - # implementations may preserve insertion order so we're just going to - # choose the best possible answer by using the "first" event ID which we - # will assume will have the greatest `stream_ordering`. We really just - # need *some* answer in case we are the first ones inserting into the - # table and in reality, - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` is run - # after this function to update it to the correct latest value. This is - # just to account for things changing in the future. - next(iter(to_insert.values())), - ] - # If we have a `bump_event_id`, let's update the `bump_stamp` column - bump_stamp_column = "" - bump_stamp_values_clause = "" - if bump_event_id is not None: - bump_stamp_column = "bump_stamp, " - bump_stamp_values_clause = ( - "(SELECT stream_ordering FROM events WHERE event_id = ?)," - ) - args.append(bump_event_id) + # # Pulling keys/values separately is safe and will produce congruent lists insert_keys = sliding_sync_joined_rooms_insert_map.keys() insert_values = sliding_sync_joined_rooms_insert_map.values() - args.extend(iter(insert_values)) # We only need to update when one of the relevant state values has changed if insert_keys: + args: List[Any] = [ + room_id, + # Even though `Mapping`/`Dict` have no guaranteed order, some + # implementations may preserve insertion order so we're just going to + # choose the best possible answer by using the "first" event ID which we + # will assume will have the greatest `stream_ordering`. We really just + # need *some* answer in case we are the first ones inserting into the + # table and in reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` is run + # after this function to update it to the correct latest value. This is + # just to account for things changing in the future. + next(iter(to_insert.values())), + ] + # If we have a `bump_event_id`, let's update the `bump_stamp` column + bump_stamp_column = "" + bump_stamp_values_clause = "" + if bump_event_id is not None: + bump_stamp_column = "bump_stamp, " + bump_stamp_values_clause = ( + "(SELECT stream_ordering FROM events WHERE event_id = ?)," + ) + args.append(bump_event_id) + + args.extend(iter(insert_values)) + # We don't update `event_stream_ordering` `ON CONFLICT` because it's simpler # we can just # From 8b0e1692f99017909d6945ba2853743cecc56179 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 18:51:11 -0500 Subject: [PATCH 039/142] More realistic remote room forgotten test --- .../client/sliding_sync/test_rooms_meta.py | 13 +- tests/storage/test_roommember.py | 212 ++++++++++++++++-- 2 files changed, 203 insertions(+), 22 deletions(-) diff --git a/tests/rest/client/sliding_sync/test_rooms_meta.py b/tests/rest/client/sliding_sync/test_rooms_meta.py index a69987f61d8..690912133a6 100644 --- a/tests/rest/client/sliding_sync/test_rooms_meta.py +++ b/tests/rest/client/sliding_sync/test_rooms_meta.py @@ -45,6 +45,9 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main self.storage_controllers = hs.get_storage_controllers() self.state_handler = self.hs.get_state_handler() + persistence = self.hs.get_storage_controllers().persistence + assert persistence is not None + self.persistence = persistence def test_rooms_meta_when_joined(self) -> None: """ @@ -601,7 +604,6 @@ def test_rooms_bump_stamp_backfill(self) -> None: Test that `bump_stamp` ignores backfilled events, i.e. events with a negative stream ordering. """ - user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -678,11 +680,10 @@ def test_rooms_bump_stamp_backfill(self) -> None: self.get_success(self.store.store_room(room_id, creator, False, room_version)) # Persist these events as backfilled events. - persistence = self.hs.get_storage_controllers().persistence - assert persistence is not None - for event, context in remote_events_and_contexts: - self.get_success(persistence.persist_event(event, context, backfilled=True)) + self.get_success( + self.persistence.persist_event(event, context, backfilled=True) + ) # Now we join the local user to the room. We want to make this feel as close to # the real `process_remote_join()` as possible but we'd like to avoid some of @@ -722,7 +723,7 @@ def test_rooms_bump_stamp_backfill(self) -> None: partial_state=False, ) ) - self.get_success(persistence.persist_event(join_event, join_context)) + self.get_success(self.persistence.persist_event(join_event, join_context)) # Doing an SS request should return a positive `bump_stamp`, even though # the only event that matches the bump types has as negative stream diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 418b5561088..8fc2e49187a 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -24,7 +24,7 @@ from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.api.constants import EventTypes, JoinRules, Membership, EventContentFields from synapse.api.room_versions import RoomVersions from synapse.rest import admin from synapse.rest.admin import register_servlets_for_client_rest_resource @@ -39,6 +39,7 @@ from tests.server import TestHomeServer from tests.test_utils import event_injection from tests.unittest import skip_unless +from tests.test_utils.event_injection import create_event logger = logging.getLogger(__name__) @@ -54,6 +55,10 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> N # We can't test the RoomMemberStore on its own without the other event # storage logic self.store = hs.get_datastores().main + self.state_handler = self.hs.get_state_handler() + persistence = self.hs.get_storage_controllers().persistence + assert persistence is not None + self.persistence = persistence self.u_alice = self.register_user("alice", "pass") self.t_alice = self.login("alice", "pass") @@ -220,31 +225,206 @@ def test_room_is_locally_forgotten(self) -> None: ) def test_join_locally_forgotten_room(self) -> None: - """Tests if a user joins a forgotten room the room is not forgotten anymore.""" - self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) - self.assertFalse( - self.get_success(self.store.is_locally_forgotten_room(self.room)) + """ + Tests if a user joins a forgotten room, the room is not forgotten anymore. + + Since a room can't be re-joined if everyone has left. This can only happen with + a room with remote users in it. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote room + creator = "@user:other" + room_id = "!foo:other" + room_version = RoomVersions.V10 + shared_kwargs = { + "room_id": room_id, + "room_version": room_version.identifier, + } + + create_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[], + type=EventTypes.Create, + state_key="", + content={ + # The `ROOM_CREATOR` field could be removed if we used a room + # version > 10 (in favor of relying on `sender`) + EventContentFields.ROOM_CREATOR: creator, + EventContentFields.ROOM_VERSION: room_version.identifier, + }, + sender=creator, + **shared_kwargs, + ) + ) + creator_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[create_tuple[0].event_id], + auth_event_ids=[create_tuple[0].event_id], + type=EventTypes.Member, + state_key=creator, + content={"membership": Membership.JOIN}, + sender=creator, + **shared_kwargs, + ) + ) + # We add a message event as a valid "bump type" + msg_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[creator_tuple[0].event_id], + auth_event_ids=[create_tuple[0].event_id], + type=EventTypes.Message, + content={"body": "foo", "msgtype": "m.text"}, + sender=creator, + **shared_kwargs, + ) + ) + invite_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[msg_tuple[0].event_id], + auth_event_ids=[create_tuple[0].event_id, creator_tuple[0].event_id], + type=EventTypes.Member, + state_key=user1_id, + content={"membership": Membership.INVITE}, + sender=creator, + **shared_kwargs, + ) ) - # after leaving and forget the room, it is forgotten - self.get_success( - event_injection.inject_member_event( - self.hs, self.room, self.u_alice, "leave" + remote_events_and_contexts = [ + create_tuple, + creator_tuple, + msg_tuple, + invite_tuple, + ] + + # Ensure the local HS knows the room version + self.get_success(self.store.store_room(room_id, creator, False, room_version)) + + # Persist these events as backfilled events. + for event, context in remote_events_and_contexts: + self.get_success( + self.persistence.persist_event(event, context, backfilled=True) + ) + + # Now we join the local user to the room. We want to make this feel as close to + # the real `process_remote_join()` as possible but we'd like to avoid some of + # the auth checks that would be done in the real code. + # + # FIXME: The test was originally written using this less-real + # `persist_event(...)` shortcut but it would be nice to use the real remote join + # process in a `FederatingHomeserverTestCase`. + flawed_join_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[invite_tuple[0].event_id], + # This doesn't work correctly to create an `EventContext` that includes + # both of these state events. I assume it's because we're working on our + # local homeserver which has the remote state set as `outlier`. We have + # to create our own EventContext below to get this right. + auth_event_ids=[create_tuple[0].event_id, invite_tuple[0].event_id], + type=EventTypes.Member, + state_key=user1_id, + content={"membership": Membership.JOIN}, + sender=user1_id, + **shared_kwargs, ) ) - self.get_success(self.store.forget(self.u_alice, self.room)) - self.assertTrue( - self.get_success(self.store.is_locally_forgotten_room(self.room)) + # We have to create our own context to get the state set correctly. If we use + # the `EventContext` from the `flawed_join_tuple`, the `current_state_events` + # table will only have the join event in it which should never happen in our + # real server. + join_event = flawed_join_tuple[0] + join_context = self.get_success( + self.state_handler.compute_event_context( + join_event, + state_ids_before_event={ + (e.type, e.state_key): e.event_id + for e in [create_tuple[0], invite_tuple[0]] + }, + partial_state=False, + ) ) + self.get_success(self.persistence.persist_event(join_event, join_context)) - # after rejoin the room is not forgotten anymore + # The room shouldn't be forgotten because the local user just joined + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(room_id)) + ) + + # After all of the local users (there is only user1) leave and forgetting the + # room, it is forgotten + user1_leave_response = self.helper.leave(room_id, user1_id, tok=user1_tok) + self.get_success(self.store.forget(user1_id, room_id)) + self.assertTrue(self.get_success(self.store.is_locally_forgotten_room(room_id))) + + # Invite local user1 again. This makes it easy to auth the join event. + invite_tuple2 = self.get_success( + create_event( + self.hs, + prev_event_ids=[user1_leave_response["event_id"]], + auth_event_ids=[create_tuple[0].event_id, creator_tuple[0].event_id], + type=EventTypes.Member, + state_key=user1_id, + content={"membership": Membership.INVITE}, + sender=creator, + **shared_kwargs, + ) + ) self.get_success( - event_injection.inject_member_event( - self.hs, self.room, self.u_alice, "join" + self.persistence.persist_event( + invite_tuple2[0], invite_tuple2[1], backfilled=True + ) + ) + + # Join the local user to the room (again). We want to make this feel as close to + # the real `process_remote_join()` as possible but we'd like to avoid some of + # the auth checks that would be done in the real code. + # + # FIXME: The test was originally written using this less-real + # `event_injection.inject_member_event(...)` shortcut but it would be nice to + # use the real remote join process in a `FederatingHomeserverTestCase`. + flawed_join_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[invite_tuple2[0].event_id], + # This doesn't work correctly to create an `EventContext` that includes + # both of these state events. I assume it's because we're working on our + # local homeserver which has the remote state set as `outlier`. We have + # to create our own EventContext below to get this right. + auth_event_ids=[create_tuple[0].event_id, invite_tuple2[0].event_id], + type=EventTypes.Member, + state_key=user1_id, + content={"membership": Membership.JOIN}, + sender=user1_id, + **shared_kwargs, ) ) + # We have to create our own context to get the state set correctly. If we use + # the `EventContext` from the `flawed_join_tuple`, the `current_state_events` + # table will only have the join event in it which should never happen in our + # real server. + join_event = flawed_join_tuple[0] + join_context = self.get_success( + self.state_handler.compute_event_context( + join_event, + state_ids_before_event={ + (e.type, e.state_key): e.event_id + for e in [create_tuple[0], invite_tuple2[0]] + }, + partial_state=False, + ) + ) + self.get_success(self.persistence.persist_event(join_event, join_context)) + + # After the local user rejoins the remote room, it isn't forgotten anymore self.assertFalse( - self.get_success(self.store.is_locally_forgotten_room(self.room)) + self.get_success(self.store.is_locally_forgotten_room(room_id)) ) From f49003c35c2d3df78bde307ffa6052a6fcf612af Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 18:55:59 -0500 Subject: [PATCH 040/142] No invites needed --- tests/storage/test_roommember.py | 64 ++++++-------------------------- 1 file changed, 12 insertions(+), 52 deletions(-) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 8fc2e49187a..d9243ac2899 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -271,36 +271,10 @@ def test_join_locally_forgotten_room(self) -> None: **shared_kwargs, ) ) - # We add a message event as a valid "bump type" - msg_tuple = self.get_success( - create_event( - self.hs, - prev_event_ids=[creator_tuple[0].event_id], - auth_event_ids=[create_tuple[0].event_id], - type=EventTypes.Message, - content={"body": "foo", "msgtype": "m.text"}, - sender=creator, - **shared_kwargs, - ) - ) - invite_tuple = self.get_success( - create_event( - self.hs, - prev_event_ids=[msg_tuple[0].event_id], - auth_event_ids=[create_tuple[0].event_id, creator_tuple[0].event_id], - type=EventTypes.Member, - state_key=user1_id, - content={"membership": Membership.INVITE}, - sender=creator, - **shared_kwargs, - ) - ) remote_events_and_contexts = [ create_tuple, creator_tuple, - msg_tuple, - invite_tuple, ] # Ensure the local HS knows the room version @@ -322,12 +296,12 @@ def test_join_locally_forgotten_room(self) -> None: flawed_join_tuple = self.get_success( create_event( self.hs, - prev_event_ids=[invite_tuple[0].event_id], + prev_event_ids=[creator_tuple[0].event_id], # This doesn't work correctly to create an `EventContext` that includes # both of these state events. I assume it's because we're working on our # local homeserver which has the remote state set as `outlier`. We have # to create our own EventContext below to get this right. - auth_event_ids=[create_tuple[0].event_id, invite_tuple[0].event_id], + auth_event_ids=[create_tuple[0].event_id], type=EventTypes.Member, state_key=user1_id, content={"membership": Membership.JOIN}, @@ -344,8 +318,7 @@ def test_join_locally_forgotten_room(self) -> None: self.state_handler.compute_event_context( join_event, state_ids_before_event={ - (e.type, e.state_key): e.event_id - for e in [create_tuple[0], invite_tuple[0]] + (e.type, e.state_key): e.event_id for e in [create_tuple[0]] }, partial_state=False, ) @@ -360,28 +333,12 @@ def test_join_locally_forgotten_room(self) -> None: # After all of the local users (there is only user1) leave and forgetting the # room, it is forgotten user1_leave_response = self.helper.leave(room_id, user1_id, tok=user1_tok) + user1_leave_event = self.get_success( + self.store.get_event(user1_leave_response["event_id"]) + ) self.get_success(self.store.forget(user1_id, room_id)) self.assertTrue(self.get_success(self.store.is_locally_forgotten_room(room_id))) - # Invite local user1 again. This makes it easy to auth the join event. - invite_tuple2 = self.get_success( - create_event( - self.hs, - prev_event_ids=[user1_leave_response["event_id"]], - auth_event_ids=[create_tuple[0].event_id, creator_tuple[0].event_id], - type=EventTypes.Member, - state_key=user1_id, - content={"membership": Membership.INVITE}, - sender=creator, - **shared_kwargs, - ) - ) - self.get_success( - self.persistence.persist_event( - invite_tuple2[0], invite_tuple2[1], backfilled=True - ) - ) - # Join the local user to the room (again). We want to make this feel as close to # the real `process_remote_join()` as possible but we'd like to avoid some of # the auth checks that would be done in the real code. @@ -392,12 +349,15 @@ def test_join_locally_forgotten_room(self) -> None: flawed_join_tuple = self.get_success( create_event( self.hs, - prev_event_ids=[invite_tuple2[0].event_id], + prev_event_ids=[user1_leave_response["event_id"]], # This doesn't work correctly to create an `EventContext` that includes # both of these state events. I assume it's because we're working on our # local homeserver which has the remote state set as `outlier`. We have # to create our own EventContext below to get this right. - auth_event_ids=[create_tuple[0].event_id, invite_tuple2[0].event_id], + auth_event_ids=[ + create_tuple[0].event_id, + user1_leave_response["event_id"], + ], type=EventTypes.Member, state_key=user1_id, content={"membership": Membership.JOIN}, @@ -415,7 +375,7 @@ def test_join_locally_forgotten_room(self) -> None: join_event, state_ids_before_event={ (e.type, e.state_key): e.event_id - for e in [create_tuple[0], invite_tuple2[0]] + for e in [create_tuple[0], user1_leave_event] }, partial_state=False, ) From c8508f113ab0a6a86dbac2714e8a9eecaad0e126 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Aug 2024 11:27:57 -0500 Subject: [PATCH 041/142] Clean up tables when a room is purged/deleted --- synapse/storage/databases/main/purge_events.py | 4 ++++ .../schema/main/delta/87/01_sliding_sync_memberships.sql | 2 ++ 2 files changed, 6 insertions(+) diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 3b81ed943c1..fc4c2865951 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -454,6 +454,10 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: # so must be deleted first. "local_current_membership", "room_memberships", + # Note: the sliding_sync_ tables have foreign keys to the `events` table + # so must be deleted first. + "sliding_sync_joined_rooms", + "sliding_sync_membership_snapshots", "events", "federation_inbound_events_staging", "receipts_graph", diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 2ad316e8e41..c8c671cf6ca 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -83,6 +83,8 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( PRIMARY KEY (room_id, user_id) ); +-- So we can purge rooms easily +CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_room_id ON sliding_sync_membership_snapshots(room_id); -- So we can fetch all rooms for a given user CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_user_id ON sliding_sync_membership_snapshots(user_id); -- So we can sort by `stream_ordering From 9f551f0e97981f4ff53b4d4c016679c0536e9ee4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Aug 2024 11:32:33 -0500 Subject: [PATCH 042/142] Fix lints --- tests/storage/test_roommember.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index d9243ac2899..330fea0e624 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -24,7 +24,7 @@ from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import EventTypes, JoinRules, Membership, EventContentFields +from synapse.api.constants import EventContentFields, EventTypes, JoinRules, Membership from synapse.api.room_versions import RoomVersions from synapse.rest import admin from synapse.rest.admin import register_servlets_for_client_rest_resource @@ -38,8 +38,8 @@ from tests import unittest from tests.server import TestHomeServer from tests.test_utils import event_injection -from tests.unittest import skip_unless from tests.test_utils.event_injection import create_event +from tests.unittest import skip_unless logger = logging.getLogger(__name__) From 1c931cb3e708091b198540b34643142d15d14564 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Aug 2024 19:19:15 -0500 Subject: [PATCH 043/142] Add background update for `sliding_sync_joined_rooms` --- synapse/storage/databases/main/events.py | 210 +++++++++------- .../databases/main/events_bg_updates.py | 230 ++++++++++++++++++ synapse/storage/databases/main/stream.py | 2 +- .../delta/87/01_sliding_sync_memberships.sql | 19 +- tests/storage/test_events.py | 104 ++++++++ 5 files changed, 479 insertions(+), 86 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 3f9ca263211..62a203e2528 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -93,6 +93,17 @@ ["type", "origin_type", "origin_entity"], ) +# State event type/key pairs that we need to gather to fill in the +# `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. +SLIDING_SYNC_RELEVANT_STATE_SET = { + # So we can fill in the `room_type` column in the `sliding_sync_joined_rooms` table + (EventTypes.Create, ""), + # So we can fill in the `is_encrypted` column in the `sliding_sync_joined_rooms` table + (EventTypes.RoomEncryption, ""), + # So we can fill in the `room_name` column in the `sliding_sync_joined_rooms` table + (EventTypes.Name, ""), +} + @attr.s(slots=True, auto_attribs=True) class DeltaState: @@ -1211,35 +1222,11 @@ def _update_current_state_txn( membership_event_id_to_user_id_map[event_id] = state_key[1] if len(membership_event_id_to_user_id_map) > 0: - relevant_state_set = { - (EventTypes.Create, ""), - (EventTypes.RoomEncryption, ""), - (EventTypes.Name, ""), - } - - # Fetch the current state event IDs from the database - ( - event_type_and_state_key_in_list_clause, - event_type_and_state_key_args, - ) = make_tuple_in_list_sql_clause( - self.database_engine, - ("type", "state_key"), - relevant_state_set, - ) - txn.execute( - f""" - SELECT c.event_id, c.type, c.state_key - FROM current_state_events AS c - WHERE - c.room_id = ? - AND {event_type_and_state_key_in_list_clause} - """, - [room_id] + event_type_and_state_key_args, + current_state_map = ( + self._get_relevant_sliding_sync_current_state_event_ids_txn( + txn, room_id + ) ) - current_state_map: MutableStateMap[str] = { - (event_type, state_key): event_id - for event_id, event_type, state_key in txn - } # Since we fetched the current state before we took `to_insert`/`to_delete` # into account, we need to do a couple fixups. # @@ -1248,7 +1235,7 @@ def _update_current_state_txn( current_state_map.pop(state_key, None) # Update the current_state_map with what we have `to_insert` for state_key, event_id in to_insert.items(): - if state_key in relevant_state_set: + if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: current_state_map[state_key] = event_id # Map of values to insert/update in the `sliding_sync_membership_snapshots` table @@ -1256,60 +1243,13 @@ def _update_current_state_txn( str, Optional[Union[str, bool]] ] = {} if current_state_map: + sliding_sync_membership_snapshots_insert_map = self._get_sliding_sync_insert_values_according_to_current_state_map_txn( + txn, current_state_map + ) # We have current state to work from sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( True ) - - # Fetch the raw event JSON from the database - ( - event_id_in_list_clause, - event_id_args, - ) = make_in_list_sql_clause( - self.database_engine, - "event_id", - current_state_map.values(), - ) - txn.execute( - f""" - SELECT event_id, type, state_key, json FROM event_json - INNER JOIN events USING (event_id) - WHERE {event_id_in_list_clause} - """, - event_id_args, - ) - - # Parse the raw event JSON - for row in txn: - event_id, event_type, state_key, json = row - event_json = db_to_json(json) - - if event_type == EventTypes.Create: - room_type = event_json.get("content", {}).get( - EventContentFields.ROOM_TYPE - ) - sliding_sync_membership_snapshots_insert_map[ - "room_type" - ] = room_type - elif event_type == EventTypes.RoomEncryption: - encryption_algorithm = event_json.get("content", {}).get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is_encrypted = encryption_algorithm is not None - sliding_sync_membership_snapshots_insert_map[ - "is_encrypted" - ] = is_encrypted - elif event_type == EventTypes.Name: - room_name = event_json.get("content", {}).get( - EventContentFields.ROOM_NAME - ) - sliding_sync_membership_snapshots_insert_map[ - "room_name" - ] = room_name - else: - raise AssertionError( - f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" - ) else: # We don't have any `current_state_events` anymore (previously # cleared out because of `no_longer_in_room`). This can happen if @@ -1468,7 +1408,12 @@ def _update_current_state_txn( ], ) - # Handle updating the `sliding_sync_joined_rooms` table + # Handle updating the `sliding_sync_joined_rooms` table. We only deal with + # updating the state related columns. The + # `event_stream_ordering`/`bump_stamp` are updated elsewhere in the event + # persisting stack (see + # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) + # event_ids_to_fetch: List[str] = [] create_event_id = None room_encryption_event_id = None @@ -1574,8 +1519,10 @@ def _update_current_state_txn( args.extend(iter(insert_values)) - # We don't update `event_stream_ordering` `ON CONFLICT` because it's simpler - # we can just + # We don't update `event_stream_ordering` `ON CONFLICT` because it's + # simpler and we can just rely on + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to do + # the right thing. # # We don't update `bump_stamp` `ON CONFLICT` because we're dealing with # state here and the only state event that is also a bump event type is @@ -1653,6 +1600,105 @@ def _update_current_state_txn( txn, {m for m in members_to_cache_bust if not self.hs.is_mine_id(m)} ) + @classmethod + def _get_relevant_sliding_sync_current_state_event_ids_txn( + cls, txn: LoggingTransaction, room_id: str + ) -> MutableStateMap[str]: + """ + Fetch the current state event IDs for the relevant (to the + `sliding_sync_joined_rooms` table) state types for the given room. + + TODO + """ + # Fetch the current state event IDs from the database + ( + event_type_and_state_key_in_list_clause, + event_type_and_state_key_args, + ) = make_tuple_in_list_sql_clause( + txn.database_engine, + ("type", "state_key"), + SLIDING_SYNC_RELEVANT_STATE_SET, + ) + txn.execute( + f""" + SELECT c.event_id, c.type, c.state_key + FROM current_state_events AS c + WHERE + c.room_id = ? + AND {event_type_and_state_key_in_list_clause} + """, + [room_id] + event_type_and_state_key_args, + ) + current_state_map: MutableStateMap[str] = { + (event_type, state_key): event_id for event_id, event_type, state_key in txn + } + + return current_state_map + + @classmethod + def _get_sliding_sync_insert_values_according_to_current_state_map_txn( + cls, txn: LoggingTransaction, current_state_map: StateMap[str] + ) -> Dict[str, Optional[Union[str, bool]]]: + """ + TODO + + Returns: + Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant + state values needed to insert into + the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + """ + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + # Fetch the raw event JSON from the database + ( + event_id_in_list_clause, + event_id_args, + ) = make_in_list_sql_clause( + txn.database_engine, + "event_id", + current_state_map.values(), + ) + txn.execute( + f""" + SELECT type, state_key, json FROM event_json + INNER JOIN events USING (event_id) + WHERE {event_id_in_list_clause} + """, + event_id_args, + ) + + # Parse the raw event JSON + for row in txn: + event_type, state_key, json = row + event_json = db_to_json(json) + + if event_type == EventTypes.Create: + room_type = event_json.get("content", {}).get( + EventContentFields.ROOM_TYPE + ) + sliding_sync_insert_map["room_type"] = room_type + elif event_type == EventTypes.RoomEncryption: + encryption_algorithm = event_json.get("content", {}).get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is_encrypted = encryption_algorithm is not None + sliding_sync_insert_map["is_encrypted"] = is_encrypted + elif event_type == EventTypes.Name: + room_name = event_json.get("content", {}).get( + EventContentFields.ROOM_NAME + ) + sliding_sync_insert_map["room_name"] = room_name + else: + # We only expect to see events according to the + # `SLIDING_SYNC_RELEVANT_STATE_SET` which is what will + # `_get_relevant_sliding_sync_current_state_event_ids_txn()` will + # return. + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" + ) + + return sliding_sync_insert_map + def _update_sliding_sync_tables_with_new_persisted_events_txn( self, txn: LoggingTransaction, diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 64d303e3307..b59bc2a5618 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -35,8 +35,10 @@ make_tuple_comparison_clause, ) from synapse.storage.databases.main.events import PersistEventsStore +from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.types import Cursor from synapse.types import JsonDict, StrCollection +from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES if TYPE_CHECKING: from synapse.server import HomeServer @@ -78,6 +80,11 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" + SLIDING_SYNC_JOINED_ROOMS_BACKFILL = "sliding_sync_joined_rooms_backfill" + SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL = ( + "sliding_sync_membership_snapshots_backfill" + ) + @attr.s(slots=True, frozen=True, auto_attribs=True) class _CalculateChainCover: @@ -279,6 +286,16 @@ def __init__( where_clause="NOT outlier", ) + # Backfill the sliding sync tables + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + self._sliding_sync_joined_rooms_backfill, + ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + self._sliding_sync_membership_snapshots_backfill, + ) + async def _background_reindex_fields_sender( self, progress: JsonDict, batch_size: int ) -> int: @@ -1516,3 +1533,216 @@ def _populate_txn(txn: LoggingTransaction) -> bool: ) return batch_size + + async def _sliding_sync_joined_rooms_backfill( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Handles backfilling the `sliding_sync_joined_rooms` table. + """ + last_room_id = progress.get("last_room_id", "") + + def make_sql_clause_for_get_last_event_pos_in_room( + database_engine: BaseDatabaseEngine, + event_types: Optional[StrCollection] = None, + ) -> Tuple[str, list]: + """ + Returns the ID and event position of the last event in a room at or before a + stream ordering. + + Based on `get_last_event_pos_in_room_before_stream_ordering(...)` + + Args: + database_engine + event_types: Optional allowlist of event types to filter by + + Returns: + A tuple of SQL query and the args + """ + event_type_clause = "" + event_type_args: List[str] = [] + if event_types is not None and len(event_types) > 0: + event_type_clause, event_type_args = make_in_list_sql_clause( + database_engine, "type", event_types + ) + event_type_clause = f"AND {event_type_clause}" + + sql = f""" + SELECT stream_ordering + FROM events + LEFT JOIN rejections USING (event_id) + WHERE room_id = ? + {event_type_clause} + AND NOT outlier + AND rejections.event_id IS NULL + ORDER BY stream_ordering DESC + LIMIT 1 + """ + + return sql, event_type_args + + def _txn(txn: LoggingTransaction) -> int: + # Fetch the set of room IDs that we want to update + txn.execute( + """ + SELECT DISTINCT room_id FROM current_state_events + WHERE room_id > ? + ORDER BY room_id ASC + LIMIT ? + """, + (last_room_id, batch_size), + ) + + rooms_to_update_rows = txn.fetchall() + if not rooms_to_update_rows: + return 0 + + for (room_id,) in rooms_to_update_rows: + logger.info("asdf Working on room %s", room_id) + current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( + txn, room_id + ) + # We're iterating over rooms pulled from the current_state_events table + # so we should have some current state for each room + assert current_state_map + + sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_according_to_current_state_map_txn( + txn, current_state_map + ) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_joined_rooms_insert_map + + ( + most_recent_event_stream_ordering_clause, + most_recent_event_stream_ordering_args, + ) = make_sql_clause_for_get_last_event_pos_in_room( + txn.database_engine, event_types=None + ) + bump_stamp_clause, bump_stamp_args = ( + make_sql_clause_for_get_last_event_pos_in_room( + txn.database_engine, + event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, + ) + ) + + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = sliding_sync_joined_rooms_insert_map.keys() + insert_values = sliding_sync_joined_rooms_insert_map.values() + + sql = f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) + VALUES ( + ?, + ({most_recent_event_stream_ordering_clause}), + ({bump_stamp_clause}), + {", ".join("?" for _ in insert_values)} + ) + ON CONFLICT (room_id) + DO UPDATE SET + event_stream_ordering = EXCLUDED.event_stream_ordering, + bump_stamp = EXCLUDED.bump_stamp, + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """ + args = ( + [room_id, room_id] + + most_recent_event_stream_ordering_args + + [room_id] + + bump_stamp_args + + list(insert_values) + ) + txn.execute(sql, args) + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + {"last_room_id": rooms_to_update_rows[-1][0]}, + ) + + return len(rooms_to_update_rows) + + count = await self.db_pool.runInteraction( + "sliding_sync_joined_rooms_backfill", _txn + ) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL + ) + + return count + + async def _sliding_sync_membership_snapshots_backfill( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Handles backfilling the `sliding_sync_membership_snapshots` table. + """ + # last_event_stream_ordering = progress.get("last_event_stream_ordering", "") + + def _txn(txn: LoggingTransaction) -> int: + # # Fetch the set of event IDs that we want to update + # txn.execute( + # """ + # SELECT room_id, user_id, event_id FROM local_current_membership + # WHERE event_stream_ordering > ? + # ORDER BY event_stream_ordering ASC + # LIMIT ? + # """, + # (last_event_stream_ordering, batch_size), + # ) + + # rows = txn.fetchall() + # if not rows: + # return 0 + + # # Update the redactions with the received_ts. + # # + # # Note: Not all events have an associated received_ts, so we + # # fallback to using origin_server_ts. If we for some reason don't + # # have an origin_server_ts, lets just use the current timestamp. + # # + # # We don't want to leave it null, as then we'll never try and + # # censor those redactions. + # txn.execute_batch( + # f""" + # INSERT INTO sliding_sync_membership_snapshots + # (room_id, user_id, membership_event_id, membership, event_stream_ordering + # {"," + (", ".join(insert_keys)) if insert_keys else ""}) + # VALUES ( + # ?, ?, ?, + # (SELECT membership FROM room_memberships WHERE event_id = ?), + # (SELECT stream_ordering FROM events WHERE event_id = ?) + # {"," + (", ".join("?" for _ in insert_values)) if insert_values else ""} + # ) + # ON CONFLICT (room_id, user_id) + # DO UPDATE SET + # membership_event_id = EXCLUDED.membership_event_id, + # membership = EXCLUDED.membership, + # event_stream_ordering = EXCLUDED.event_stream_ordering + # {"," + (", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} + # """, + # (TODO,), + # ) + + # self.db_pool.updates._background_update_progress_txn( + # txn, "redactions_received_ts", {"last_event_id": upper_event_id} + # ) + + # return len(rows) + + # TODO + # return len(rows) + return 0 + + count = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill", _txn + ) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL + ) + + return count diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 4989c960a64..3054174717b 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1268,7 +1268,7 @@ async def get_last_event_pos_in_room_before_stream_ordering( self, room_id: str, end_token: RoomStreamToken, - event_types: Optional[Collection[str]] = None, + event_types: Optional[StrCollection] = None, ) -> Optional[Tuple[str, PersistedEventPosition]]: """ Returns the ID and event position of the last event in a room at or before a diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index c8c671cf6ca..16b3f84c3d2 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -22,7 +22,7 @@ -- no longer participating in a room, the row will be deleted. CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( room_id TEXT NOT NULL REFERENCES rooms(room_id), - -- The `stream_ordering` of the latest event in the room + -- The `stream_ordering` of the most-recent/latest event in the room event_stream_ordering BIGINT NOT NULL REFERENCES events(stream_ordering), -- The `stream_ordering` of the last event according to the `bump_event_types` bump_stamp BIGINT, @@ -83,9 +83,22 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( PRIMARY KEY (room_id, user_id) ); --- So we can purge rooms easily -CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_room_id ON sliding_sync_membership_snapshots(room_id); +-- So we can purge rooms easily. +-- +-- Since we're using a multi-column index as the primary key (room_id, user_id), the +-- first index column (room_id) is always usable for searching so we don't need to +-- create a separate index for it. +-- +-- CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_room_id ON sliding_sync_membership_snapshots(room_id); + -- So we can fetch all rooms for a given user CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_user_id ON sliding_sync_membership_snapshots(user_id); -- So we can sort by `stream_ordering CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream_ordering ON sliding_sync_membership_snapshots(event_stream_ordering); + + +-- Add some background updates to populate the new tables +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (8701, 'sliding_sync_joined_rooms_backfill', '{}'); +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (8701, 'sliding_sync_membership_snapshots_backfill', '{}'); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 99b3a5676e2..70ba415f7f0 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -32,6 +32,7 @@ from synapse.events import EventBase, StrippedStateEvent, make_event_from_dict from synapse.events.snapshot import EventContext from synapse.federation.federation_base import event_from_pdu_json +from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer @@ -2378,3 +2379,106 @@ def test_non_join_retracted_remote_invite(self) -> None: ) # TODO: test_non_join_state_reset + + def test_joined_background_update_missing(self) -> None: + """ + Test that the background update for `sliding_sync_joined_rooms` backfills missing rows + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user1_id, tok=user1_tok) + + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user1_tok, + ) + + # Clean-up the `sliding_sync_joined_rooms` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_joined_rooms", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="RelationsTestCase.test_background_update", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + # Ugh, have to reset this flag + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_no_info, room_id_with_info, space_room_id}, + exact=True, + ) + # self.assertEqual( + # sliding_sync_joined_rooms_results[room_id1], + # _SlidingSyncJoinedRoomResult( + # room_id=room_id1, + # # Latest event in the room + # event_stream_ordering=room_name_update_event_pos.stream, + # bump_stamp=state_map[ + # (EventTypes.Create, "") + # ].internal_metadata.stream_ordering, + # room_type=None, + # room_name="my super duper room", + # is_encrypted=False, + # ), + # ) + + def test_joined_background_update_partial(self) -> None: + """ + Test that the background update for `sliding_sync_joined_rooms` backfills partially updated rows + """ From 23e0d34a2dec7a4828d0b3e3713a15dd5650a92c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Aug 2024 19:30:22 -0500 Subject: [PATCH 044/142] Add more tests --- tests/storage/test_events.py | 172 +++++++++++++++++++++++++++++++---- 1 file changed, 155 insertions(+), 17 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 70ba415f7f0..0c788c2ee52 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2430,7 +2430,7 @@ def test_joined_background_update_missing(self) -> None: column="room_id", iterable=(room_id_no_info, room_id_with_info, space_room_id), keyvalues={}, - desc="RelationsTestCase.test_background_update", + desc="sliding_sync_joined_rooms.test_joined_background_update_missing", ) ) @@ -2453,7 +2453,6 @@ def test_joined_background_update_missing(self) -> None: }, ) ) - # Ugh, have to reset this flag self.store.db_pool.updates._all_done = False self.wait_for_background_updates() @@ -2463,22 +2462,161 @@ def test_joined_background_update_missing(self) -> None: {room_id_no_info, room_id_with_info, space_room_id}, exact=True, ) - # self.assertEqual( - # sliding_sync_joined_rooms_results[room_id1], - # _SlidingSyncJoinedRoomResult( - # room_id=room_id1, - # # Latest event in the room - # event_stream_ordering=room_name_update_event_pos.stream, - # bump_stamp=state_map[ - # (EventTypes.Create, "") - # ].internal_metadata.stream_ordering, - # room_type=None, - # room_name="my super duper room", - # is_encrypted=False, - # ), - # ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_no_info) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_no_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_no_info, + # History visibility just happens to be the last event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomHistoryVisibility, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_with_info) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_with_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_with_info, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.Name, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) def test_joined_background_update_partial(self) -> None: """ - Test that the background update for `sliding_sync_joined_rooms` backfills partially updated rows + Test that the background update for `sliding_sync_joined_rooms` backfills + partially updated rows. """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_with_info) + ) + + # Clean-up the `sliding_sync_joined_rooms` table as if the the encryption event + # never made it into the table. + self.get_success( + self.store.db_pool.simple_update( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id_with_info}, + updatevalues={"is_encrypted": False}, + desc="sliding_sync_joined_rooms.test_joined_background_update_partial", + ) + ) + + # We should see the partial row that we made in preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_with_info}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_with_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_with_info, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_with_info}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_with_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_with_info, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) From d113e743ae0ff604eeadefd81965e058c3898dc0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Aug 2024 19:30:52 -0500 Subject: [PATCH 045/142] Fix lints --- tests/storage/test_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 0c788c2ee52..8875a9364ed 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -32,10 +32,10 @@ from synapse.events import EventBase, StrippedStateEvent, make_event_from_dict from synapse.events.snapshot import EventContext from synapse.federation.federation_base import event_from_pdu_json -from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.types import StateMap from synapse.util import Clock From 4b42e44ef9cb3318c574c716464ef5f5c23edfae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 00:21:35 -0500 Subject: [PATCH 046/142] Work on background update for `sliding_sync_membership_snapshots` --- synapse/storage/databases/main/events.py | 195 +++++++------- .../databases/main/events_bg_updates.py | 245 +++++++++++++----- 2 files changed, 293 insertions(+), 147 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 62a203e2528..138afd324f2 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1243,8 +1243,10 @@ def _update_current_state_txn( str, Optional[Union[str, bool]] ] = {} if current_state_map: - sliding_sync_membership_snapshots_insert_map = self._get_sliding_sync_insert_values_according_to_current_state_map_txn( - txn, current_state_map + sliding_sync_membership_snapshots_insert_map = ( + self._get_sliding_sync_insert_values_from_current_state_map_txn( + txn, current_state_map + ) ) # We have current state to work from sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( @@ -1279,19 +1281,19 @@ def _update_current_state_txn( f""" INSERT INTO sliding_sync_membership_snapshots (room_id, user_id, membership_event_id, membership, event_stream_ordering - {"," + (", ".join(insert_keys)) if insert_keys else ""}) + {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?), (SELECT stream_ordering FROM events WHERE event_id = ?) - {"," + (", ".join("?" for _ in insert_values)) if insert_values else ""} + {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) DO UPDATE SET membership_event_id = EXCLUDED.membership_event_id, membership = EXCLUDED.membership, event_stream_ordering = EXCLUDED.event_stream_ordering - {"," + (", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} + {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} """, [ [ @@ -1636,7 +1638,7 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( return current_state_map @classmethod - def _get_sliding_sync_insert_values_according_to_current_state_map_txn( + def _get_sliding_sync_insert_values_from_current_state_map_txn( cls, txn: LoggingTransaction, current_state_map: StateMap[str] ) -> Dict[str, Optional[Union[str, bool]]]: """ @@ -2448,91 +2450,27 @@ def _store_room_members_txn( "membership": event.membership, "event_stream_ordering": event.internal_metadata.stream_ordering, } - if raw_stripped_state_events is not None: - stripped_state_map: MutableStateMap[StrippedStateEvent] = {} - if isinstance(raw_stripped_state_events, list): - for raw_stripped_event in raw_stripped_state_events: - stripped_state_event = parse_stripped_state_event( - raw_stripped_event - ) - if stripped_state_event is not None: - stripped_state_map[ - ( - stripped_state_event.type, - stripped_state_event.state_key, - ) - ] = stripped_state_event - - # If there is some stripped state, we assume the remote server passed *all* - # of the potential stripped state events for the room. - create_stripped_event = stripped_state_map.get( - (EventTypes.Create, "") - ) - # Sanity check that we at-least have the create event - if create_stripped_event is not None: - insert_values["has_known_state"] = True - - # Find the room_type - insert_values["room_type"] = ( - create_stripped_event.content.get( - EventContentFields.ROOM_TYPE - ) - if create_stripped_event is not None - else None - ) - - # Find whether the room is_encrypted - encryption_stripped_event = stripped_state_map.get( - (EventTypes.RoomEncryption, "") - ) - encryption = ( - encryption_stripped_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - if encryption_stripped_event is not None - else None - ) - insert_values["is_encrypted"] = encryption is not None - - # Find the room_name - room_name_stripped_event = stripped_state_map.get( - (EventTypes.Name, "") - ) - insert_values["room_name"] = ( - room_name_stripped_event.content.get( - EventContentFields.ROOM_NAME - ) - if room_name_stripped_event is not None - else None + if event.membership == Membership.LEAVE: + # Inherit the meta data from the remote invite/knock. When using + # sliding sync filters, this will prevent the room from + # disappearing/appearing just because you left the room. + pass + elif event.membership in (Membership.INVITE, Membership.KNOCK): + extra_insert_values = ( + self._get_sliding_sync_insert_values_from_stripped_state_txn( + txn, raw_stripped_state_events ) - - else: - # No strip state provided - insert_values["has_known_state"] = False - insert_values["room_type"] = None - insert_values["room_name"] = None - insert_values["is_encrypted"] = False + ) + insert_values.update(extra_insert_values) else: - if event.membership == Membership.LEAVE: - # Inherit the meta data from the remote invite/knock. When using - # sliding sync filters, this will prevent the room from - # disappearing/appearing just because you left the room. - pass - elif event.membership in (Membership.INVITE, Membership.KNOCK): - # No strip state provided - insert_values["has_known_state"] = False - insert_values["room_type"] = None - insert_values["room_name"] = None - insert_values["is_encrypted"] = False - else: - # We don't know how to handle this type of membership yet - # - # FIXME: We should use `assert_never` here but for some reason - # the exhaustive matching doesn't recognize the `Never` here. - # assert_never(event.membership) - raise AssertionError( - f"Unexpected out-of-band membership {event.membership} ({event.event_id}) that we don't know how to handle yet" - ) + # We don't know how to handle this type of membership yet + # + # FIXME: We should use `assert_never` here but for some reason + # the exhaustive matching doesn't recognize the `Never` here. + # assert_never(event.membership) + raise AssertionError( + f"Unexpected out-of-band membership {event.membership} ({event.event_id}) that we don't know how to handle yet" + ) self.db_pool.simple_upsert_txn( txn, @@ -2544,6 +2482,85 @@ def _store_room_members_txn( values=insert_values, ) + @classmethod + def _get_sliding_sync_insert_values_from_stripped_state_txn( + cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any + ) -> Dict[str, Optional[Union[str, bool]]]: + """ + TODO + + Returns: + Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant + state values needed to insert into the `sliding_sync_membership_snapshots` tables. + """ + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + + if unsigned_stripped_state_events is not None: + stripped_state_map: MutableStateMap[StrippedStateEvent] = {} + if isinstance(unsigned_stripped_state_events, list): + for raw_stripped_event in unsigned_stripped_state_events: + stripped_state_event = parse_stripped_state_event( + raw_stripped_event + ) + if stripped_state_event is not None: + stripped_state_map[ + ( + stripped_state_event.type, + stripped_state_event.state_key, + ) + ] = stripped_state_event + + # If there is some stripped state, we assume the remote server passed *all* + # of the potential stripped state events for the room. + create_stripped_event = stripped_state_map.get((EventTypes.Create, "")) + # Sanity check that we at-least have the create event + if create_stripped_event is not None: + sliding_sync_insert_map["has_known_state"] = True + + # Find the room_type + sliding_sync_insert_map["room_type"] = ( + create_stripped_event.content.get(EventContentFields.ROOM_TYPE) + if create_stripped_event is not None + else None + ) + + # Find whether the room is_encrypted + encryption_stripped_event = stripped_state_map.get( + (EventTypes.RoomEncryption, "") + ) + encryption = ( + encryption_stripped_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + if encryption_stripped_event is not None + else None + ) + sliding_sync_insert_map["is_encrypted"] = encryption is not None + + # Find the room_name + room_name_stripped_event = stripped_state_map.get((EventTypes.Name, "")) + sliding_sync_insert_map["room_name"] = ( + room_name_stripped_event.content.get(EventContentFields.ROOM_NAME) + if room_name_stripped_event is not None + else None + ) + + else: + # No strip state provided + sliding_sync_insert_map["has_known_state"] = False + sliding_sync_insert_map["room_type"] = None + sliding_sync_insert_map["room_name"] = None + sliding_sync_insert_map["is_encrypted"] = False + else: + # No strip state provided + sliding_sync_insert_map["has_known_state"] = False + sliding_sync_insert_map["room_type"] = None + sliding_sync_insert_map["room_name"] = None + sliding_sync_insert_map["is_encrypted"] = False + + return sliding_sync_insert_map + def _handle_event_relations( self, txn: LoggingTransaction, event: EventBase ) -> None: diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index b59bc2a5618..cef354dd5be 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -20,11 +20,12 @@ # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast import attr +from typing_extensions import assert_never -from synapse.api.constants import EventContentFields, RelationTypes +from synapse.api.constants import EventContentFields, Membership, RelationTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause @@ -1598,7 +1599,6 @@ def _txn(txn: LoggingTransaction) -> int: return 0 for (room_id,) in rooms_to_update_rows: - logger.info("asdf Working on room %s", room_id) current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( txn, room_id ) @@ -1606,7 +1606,7 @@ def _txn(txn: LoggingTransaction) -> int: # so we should have some current state for each room assert current_state_map - sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_according_to_current_state_map_txn( + sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( txn, current_state_map ) # We should have some insert values for each room, even if they are `None` @@ -1679,62 +1679,191 @@ async def _sliding_sync_membership_snapshots_backfill( """ Handles backfilling the `sliding_sync_membership_snapshots` table. """ - # last_event_stream_ordering = progress.get("last_event_stream_ordering", "") + last_event_stream_ordering = progress.get( + "last_event_stream_ordering", -(1 << 31) + ) def _txn(txn: LoggingTransaction) -> int: - # # Fetch the set of event IDs that we want to update - # txn.execute( - # """ - # SELECT room_id, user_id, event_id FROM local_current_membership - # WHERE event_stream_ordering > ? - # ORDER BY event_stream_ordering ASC - # LIMIT ? - # """, - # (last_event_stream_ordering, batch_size), - # ) - - # rows = txn.fetchall() - # if not rows: - # return 0 - - # # Update the redactions with the received_ts. - # # - # # Note: Not all events have an associated received_ts, so we - # # fallback to using origin_server_ts. If we for some reason don't - # # have an origin_server_ts, lets just use the current timestamp. - # # - # # We don't want to leave it null, as then we'll never try and - # # censor those redactions. - # txn.execute_batch( - # f""" - # INSERT INTO sliding_sync_membership_snapshots - # (room_id, user_id, membership_event_id, membership, event_stream_ordering - # {"," + (", ".join(insert_keys)) if insert_keys else ""}) - # VALUES ( - # ?, ?, ?, - # (SELECT membership FROM room_memberships WHERE event_id = ?), - # (SELECT stream_ordering FROM events WHERE event_id = ?) - # {"," + (", ".join("?" for _ in insert_values)) if insert_values else ""} - # ) - # ON CONFLICT (room_id, user_id) - # DO UPDATE SET - # membership_event_id = EXCLUDED.membership_event_id, - # membership = EXCLUDED.membership, - # event_stream_ordering = EXCLUDED.event_stream_ordering - # {"," + (", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} - # """, - # (TODO,), - # ) - - # self.db_pool.updates._background_update_progress_txn( - # txn, "redactions_received_ts", {"last_event_id": upper_event_id} - # ) - - # return len(rows) - - # TODO - # return len(rows) - return 0 + # Fetch the set of event IDs that we want to update + txn.execute( + """ + SELECT + c.room_id, + c.user_id, + c.event_id, + c.membership, + c.event_stream_ordering, + e.outlier + FROM local_current_membership as c + INNER JOIN events AS e USING (event_id) + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering ASC + LIMIT ? + """, + (last_event_stream_ordering, batch_size), + ) + + memberships_to_update_rows = txn.fetchall() + if not memberships_to_update_rows: + return 0 + + for ( + room_id, + user_id, + membership_event_id, + membership, + _membership_event_stream_ordering, + is_outlier, + ) in memberships_to_update_rows: + # We don't know how to handle `membership` values other than these. The + # code below would need to be updated. + assert membership in ( + Membership.JOIN, + Membership.INVITE, + Membership.KNOCK, + Membership.LEAVE, + Membership.BAN, + ) + + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + if membership == Membership.JOIN: + # If we're still joined, we can pull from current state + current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( + txn, room_id + ) + # We're iterating over rooms that we are joined to so they should + # have `current_state_events` and we should have some current state + # for each room + assert current_state_map + + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( + txn, current_state_map + ) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have current state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( + True + ) + elif membership in (Membership.INVITE, Membership.KNOCK) or ( + membership == Membership.LEAVE and is_outlier + ): + invite_or_knock_event_id = membership_event_id + invite_or_knock_membership = membership + + # If the event is an `out_of_band_membership` (special case of + # `outlier`), we never had historical state so we have to pull from + # the stripped state on the previous invite/knock event. This gives + # us a consistent view of the room state regardless of your + # membership (i.e. the room shouldn't disappear if your using the + # `is_encrypted` filter and you leave). + if membership == Membership.LEAVE and is_outlier: + # Find the previous invite/knock event before the leave event + txn.execute( + """ + SELECT event_id, membership + FROM room_memberships + WHERE + room_id = ? + AND user_id = ? + AND event_stream_ordering < ? + ORDER BY event_stream_ordering DESC + LIMIT 1 + """ + ) + row = txn.fetchone() + # We should see a corresponding previous invite/knock event + assert row is not None + invite_or_knock_event_id, invite_or_knock_membership = row + + # Pull from the stripped state on the invite/knock event + txn.execute( + """ + SELECT json FROM event_json + WHERE event_id = ? + """, + (invite_or_knock_event_id), + ) + row = txn.fetchone() + # We should find a corresponding event + assert row is not None + json = row[0] + event_json = db_to_json(json) + + raw_stripped_state_events = None + if invite_or_knock_membership == Membership.INVITE: + invite_room_state = event_json.get("unsigned").get( + "invite_room_state" + ) + raw_stripped_state_events = invite_room_state + elif invite_or_knock_membership == Membership.KNOCK: + knock_room_state = event_json.get("unsigned").get( + "knock_room_state" + ) + raw_stripped_state_events = knock_room_state + + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn( + txn, raw_stripped_state_events + ) + # We should have some insert values for each room, even if no + # stripped state is on the event because we still want to record + # that we have no known state + assert sliding_sync_membership_snapshots_insert_map + elif membership == Membership.BAN: + # Pull from historical state + # TODO + pass + else: + assert_never(membership) + + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = sliding_sync_membership_snapshots_insert_map.keys() + insert_values = sliding_sync_membership_snapshots_insert_map.values() + # We don't need to do anything `ON CONFLICT` because we never partially + # insert/update the snapshots + txn.execute( + f""" + INSERT INTO sliding_sync_membership_snapshots + (room_id, user_id, membership_event_id, membership, event_stream_ordering + {("," + ", ".join(insert_keys)) if insert_keys else ""}) + VALUES ( + ?, ?, ?, ?, + (SELECT stream_ordering FROM events WHERE event_id = ?) + {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} + ) + ON CONFLICT (room_id, user_id) + DO NOTHING + """, + [ + room_id, + user_id, + membership_event_id, + membership, + membership_event_id, + ] + + list(insert_values), + ) + + ( + _room_id, + _user_id, + _membership_event_id, + _membership, + membership_event_stream_ordering, + _is_outlier, + ) = memberships_to_update_rows[-1] + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + {"last_event_stream_ordering": membership_event_stream_ordering}, + ) + + return len(memberships_to_update_rows) count = await self.db_pool.runInteraction( "sliding_sync_membership_snapshots_backfill", _txn From cbeff5740251a09090822bde90b62ad626332e73 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 20:31:57 -0500 Subject: [PATCH 047/142] Use helper --- synapse/storage/databases/main/events.py | 61 ++++-------------------- 1 file changed, 8 insertions(+), 53 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 138afd324f2..025576eb6e4 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1416,21 +1416,11 @@ def _update_current_state_txn( # persisting stack (see # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) # - event_ids_to_fetch: List[str] = [] - create_event_id = None - room_encryption_event_id = None - room_name_event_id = None bump_event_id = None + current_state_map = {} for state_key, event_id in to_insert.items(): - if state_key[0] == EventTypes.Create and state_key[1] == "": - create_event_id = event_id - event_ids_to_fetch.append(event_id) - elif state_key[0] == EventTypes.RoomEncryption and state_key[1] == "": - room_encryption_event_id = event_id - event_ids_to_fetch.append(event_id) - elif state_key[0] == EventTypes.Name and state_key[1] == "": - room_name_event_id = event_id - event_ids_to_fetch.append(event_id) + if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: + current_state_map[state_key] = event_id if ( state_key[0] in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES @@ -1439,9 +1429,11 @@ def _update_current_state_txn( bump_event_id = event_id # Map of values to insert/update in the `sliding_sync_joined_rooms` table - sliding_sync_joined_rooms_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} + sliding_sync_joined_rooms_insert_map = ( + self._get_sliding_sync_insert_values_from_current_state_map_txn( + txn, current_state_map + ) + ) # If something is being deleted from the state, we need to clear it out for state_key in to_delete: @@ -1452,43 +1444,6 @@ def _update_current_state_txn( elif state_key == (EventTypes.Name, ""): sliding_sync_joined_rooms_insert_map["room_name"] = None - # Fetch the events from the database - event_json_rows = cast( - List[Tuple[str, str]], - self.db_pool.simple_select_many_txn( - txn, - table="event_json", - column="event_id", - iterable=event_ids_to_fetch, - retcols=["event_id", "json"], - keyvalues={}, - ), - ) - # Parse the raw event JSON - for event_id, json in event_json_rows: - event_json = db_to_json(json) - - if event_id == create_event_id: - room_type = event_json.get("content", {}).get( - EventContentFields.ROOM_TYPE - ) - sliding_sync_joined_rooms_insert_map["room_type"] = room_type - elif event_id == room_encryption_event_id: - encryption_algorithm = event_json.get("content", {}).get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is_encrypted = encryption_algorithm is not None - sliding_sync_joined_rooms_insert_map["is_encrypted"] = is_encrypted - elif event_id == room_name_event_id: - room_name = event_json.get("content", {}).get( - EventContentFields.ROOM_NAME - ) - sliding_sync_joined_rooms_insert_map["room_name"] = room_name - else: - raise AssertionError( - f"Unexpected event_id (we should not be fetching extra events): {event_id}" - ) - # Update the `sliding_sync_joined_rooms` table # # Pulling keys/values separately is safe and will produce congruent lists From 6c2fc1d20f830adca8c7ff1d80941283d9054ed1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 20:51:05 -0500 Subject: [PATCH 048/142] Move background updates to `StateBackgroundUpdateStore` So we can access `_get_state_groups_from_groups_txn(...)` --- synapse/storage/databases/main/events.py | 7 +- .../databases/main/events_bg_updates.py | 363 +---------------- synapse/storage/databases/state/bg_updates.py | 382 +++++++++++++++++- tests/storage/test_events.py | 2 +- 4 files changed, 386 insertions(+), 368 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 025576eb6e4..23dd9f2ccee 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1194,7 +1194,7 @@ def _update_current_state_txn( if ev_type == EventTypes.Member } - # We now update `sliding_sync_membership_snapshots`. + # Handle updating the `sliding_sync_membership_snapshots` table # # This would only happen if someone was state reset out of the room if to_delete: @@ -1674,7 +1674,7 @@ def _update_sliding_sync_tables_with_new_persisted_events_txn( events_and_contexts: The events being persisted """ - # Handle updating `sliding_sync_joined_rooms` + # Handle updating the `sliding_sync_joined_rooms` table. room_id_to_stream_ordering_map: Dict[str, int] = {} room_id_to_bump_stamp_map: Dict[str, int] = {} for event, _ in events_and_contexts: @@ -2390,7 +2390,8 @@ def _store_room_members_txn( }, ) - # Update the `sliding_sync_membership_snapshots` table + # Handle updating the `sliding_sync_membership_snapshots` table + # (out-of-band membership events only) # raw_stripped_state_events = None if event.membership == Membership.INVITE: diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index cef354dd5be..64d303e3307 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -20,12 +20,11 @@ # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast import attr -from typing_extensions import assert_never -from synapse.api.constants import EventContentFields, Membership, RelationTypes +from synapse.api.constants import EventContentFields, RelationTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause @@ -36,10 +35,8 @@ make_tuple_comparison_clause, ) from synapse.storage.databases.main.events import PersistEventsStore -from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.types import Cursor from synapse.types import JsonDict, StrCollection -from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES if TYPE_CHECKING: from synapse.server import HomeServer @@ -81,11 +78,6 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" - SLIDING_SYNC_JOINED_ROOMS_BACKFILL = "sliding_sync_joined_rooms_backfill" - SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL = ( - "sliding_sync_membership_snapshots_backfill" - ) - @attr.s(slots=True, frozen=True, auto_attribs=True) class _CalculateChainCover: @@ -287,16 +279,6 @@ def __init__( where_clause="NOT outlier", ) - # Backfill the sliding sync tables - self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, - self._sliding_sync_joined_rooms_backfill, - ) - self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - self._sliding_sync_membership_snapshots_backfill, - ) - async def _background_reindex_fields_sender( self, progress: JsonDict, batch_size: int ) -> int: @@ -1534,344 +1516,3 @@ def _populate_txn(txn: LoggingTransaction) -> bool: ) return batch_size - - async def _sliding_sync_joined_rooms_backfill( - self, progress: JsonDict, batch_size: int - ) -> int: - """ - Handles backfilling the `sliding_sync_joined_rooms` table. - """ - last_room_id = progress.get("last_room_id", "") - - def make_sql_clause_for_get_last_event_pos_in_room( - database_engine: BaseDatabaseEngine, - event_types: Optional[StrCollection] = None, - ) -> Tuple[str, list]: - """ - Returns the ID and event position of the last event in a room at or before a - stream ordering. - - Based on `get_last_event_pos_in_room_before_stream_ordering(...)` - - Args: - database_engine - event_types: Optional allowlist of event types to filter by - - Returns: - A tuple of SQL query and the args - """ - event_type_clause = "" - event_type_args: List[str] = [] - if event_types is not None and len(event_types) > 0: - event_type_clause, event_type_args = make_in_list_sql_clause( - database_engine, "type", event_types - ) - event_type_clause = f"AND {event_type_clause}" - - sql = f""" - SELECT stream_ordering - FROM events - LEFT JOIN rejections USING (event_id) - WHERE room_id = ? - {event_type_clause} - AND NOT outlier - AND rejections.event_id IS NULL - ORDER BY stream_ordering DESC - LIMIT 1 - """ - - return sql, event_type_args - - def _txn(txn: LoggingTransaction) -> int: - # Fetch the set of room IDs that we want to update - txn.execute( - """ - SELECT DISTINCT room_id FROM current_state_events - WHERE room_id > ? - ORDER BY room_id ASC - LIMIT ? - """, - (last_room_id, batch_size), - ) - - rooms_to_update_rows = txn.fetchall() - if not rooms_to_update_rows: - return 0 - - for (room_id,) in rooms_to_update_rows: - current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( - txn, room_id - ) - # We're iterating over rooms pulled from the current_state_events table - # so we should have some current state for each room - assert current_state_map - - sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( - txn, current_state_map - ) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_joined_rooms_insert_map - - ( - most_recent_event_stream_ordering_clause, - most_recent_event_stream_ordering_args, - ) = make_sql_clause_for_get_last_event_pos_in_room( - txn.database_engine, event_types=None - ) - bump_stamp_clause, bump_stamp_args = ( - make_sql_clause_for_get_last_event_pos_in_room( - txn.database_engine, - event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, - ) - ) - - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = sliding_sync_joined_rooms_insert_map.keys() - insert_values = sliding_sync_joined_rooms_insert_map.values() - - sql = f""" - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) - VALUES ( - ?, - ({most_recent_event_stream_ordering_clause}), - ({bump_stamp_clause}), - {", ".join("?" for _ in insert_values)} - ) - ON CONFLICT (room_id) - DO UPDATE SET - event_stream_ordering = EXCLUDED.event_stream_ordering, - bump_stamp = EXCLUDED.bump_stamp, - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} - """ - args = ( - [room_id, room_id] - + most_recent_event_stream_ordering_args - + [room_id] - + bump_stamp_args - + list(insert_values) - ) - txn.execute(sql, args) - - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, - {"last_room_id": rooms_to_update_rows[-1][0]}, - ) - - return len(rooms_to_update_rows) - - count = await self.db_pool.runInteraction( - "sliding_sync_joined_rooms_backfill", _txn - ) - - if not count: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL - ) - - return count - - async def _sliding_sync_membership_snapshots_backfill( - self, progress: JsonDict, batch_size: int - ) -> int: - """ - Handles backfilling the `sliding_sync_membership_snapshots` table. - """ - last_event_stream_ordering = progress.get( - "last_event_stream_ordering", -(1 << 31) - ) - - def _txn(txn: LoggingTransaction) -> int: - # Fetch the set of event IDs that we want to update - txn.execute( - """ - SELECT - c.room_id, - c.user_id, - c.event_id, - c.membership, - c.event_stream_ordering, - e.outlier - FROM local_current_membership as c - INNER JOIN events AS e USING (event_id) - WHERE event_stream_ordering > ? - ORDER BY event_stream_ordering ASC - LIMIT ? - """, - (last_event_stream_ordering, batch_size), - ) - - memberships_to_update_rows = txn.fetchall() - if not memberships_to_update_rows: - return 0 - - for ( - room_id, - user_id, - membership_event_id, - membership, - _membership_event_stream_ordering, - is_outlier, - ) in memberships_to_update_rows: - # We don't know how to handle `membership` values other than these. The - # code below would need to be updated. - assert membership in ( - Membership.JOIN, - Membership.INVITE, - Membership.KNOCK, - Membership.LEAVE, - Membership.BAN, - ) - - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} - if membership == Membership.JOIN: - # If we're still joined, we can pull from current state - current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( - txn, room_id - ) - # We're iterating over rooms that we are joined to so they should - # have `current_state_events` and we should have some current state - # for each room - assert current_state_map - - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( - txn, current_state_map - ) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_membership_snapshots_insert_map - - # We have current state to work from - sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( - True - ) - elif membership in (Membership.INVITE, Membership.KNOCK) or ( - membership == Membership.LEAVE and is_outlier - ): - invite_or_knock_event_id = membership_event_id - invite_or_knock_membership = membership - - # If the event is an `out_of_band_membership` (special case of - # `outlier`), we never had historical state so we have to pull from - # the stripped state on the previous invite/knock event. This gives - # us a consistent view of the room state regardless of your - # membership (i.e. the room shouldn't disappear if your using the - # `is_encrypted` filter and you leave). - if membership == Membership.LEAVE and is_outlier: - # Find the previous invite/knock event before the leave event - txn.execute( - """ - SELECT event_id, membership - FROM room_memberships - WHERE - room_id = ? - AND user_id = ? - AND event_stream_ordering < ? - ORDER BY event_stream_ordering DESC - LIMIT 1 - """ - ) - row = txn.fetchone() - # We should see a corresponding previous invite/knock event - assert row is not None - invite_or_knock_event_id, invite_or_knock_membership = row - - # Pull from the stripped state on the invite/knock event - txn.execute( - """ - SELECT json FROM event_json - WHERE event_id = ? - """, - (invite_or_knock_event_id), - ) - row = txn.fetchone() - # We should find a corresponding event - assert row is not None - json = row[0] - event_json = db_to_json(json) - - raw_stripped_state_events = None - if invite_or_knock_membership == Membership.INVITE: - invite_room_state = event_json.get("unsigned").get( - "invite_room_state" - ) - raw_stripped_state_events = invite_room_state - elif invite_or_knock_membership == Membership.KNOCK: - knock_room_state = event_json.get("unsigned").get( - "knock_room_state" - ) - raw_stripped_state_events = knock_room_state - - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn( - txn, raw_stripped_state_events - ) - # We should have some insert values for each room, even if no - # stripped state is on the event because we still want to record - # that we have no known state - assert sliding_sync_membership_snapshots_insert_map - elif membership == Membership.BAN: - # Pull from historical state - # TODO - pass - else: - assert_never(membership) - - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = sliding_sync_membership_snapshots_insert_map.keys() - insert_values = sliding_sync_membership_snapshots_insert_map.values() - # We don't need to do anything `ON CONFLICT` because we never partially - # insert/update the snapshots - txn.execute( - f""" - INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering - {("," + ", ".join(insert_keys)) if insert_keys else ""}) - VALUES ( - ?, ?, ?, ?, - (SELECT stream_ordering FROM events WHERE event_id = ?) - {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} - ) - ON CONFLICT (room_id, user_id) - DO NOTHING - """, - [ - room_id, - user_id, - membership_event_id, - membership, - membership_event_id, - ] - + list(insert_values), - ) - - ( - _room_id, - _user_id, - _membership_event_id, - _membership, - membership_event_stream_ordering, - _is_outlier, - ) = memberships_to_update_rows[-1] - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - {"last_event_stream_ordering": membership_event_stream_ordering}, - ) - - return len(memberships_to_update_rows) - - count = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill", _txn - ) - - if not count: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL - ) - - return count diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index ea7d8199a7d..39c82807d22 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -22,15 +22,20 @@ import logging from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union +from typing_extensions import assert_never + +from synapse.api.constants import Membership from synapse.logging.opentracing import tag_args, trace -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, LoggingTransaction, ) -from synapse.storage.engines import PostgresEngine -from synapse.types import MutableStateMap, StateMap +from synapse.storage.databases.main.events import PersistEventsStore +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine +from synapse.types import JsonDict, MutableStateMap, StateMap, StrCollection +from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.types.state import StateFilter from synapse.util.caches import intern_string @@ -43,6 +48,13 @@ MAX_STATE_DELTA_HOPS = 100 +class _BackgroundUpdates: + SLIDING_SYNC_JOINED_ROOMS_BACKFILL = "sliding_sync_joined_rooms_backfill" + SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL = ( + "sliding_sync_membership_snapshots_backfill" + ) + + class StateGroupBackgroundUpdateStore(SQLBaseStore): """Defines functions related to state groups needed to run the state background updates. @@ -349,6 +361,16 @@ def __init__( columns=["event_stream_ordering"], ) + # Backfill the sliding sync tables + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + self._sliding_sync_joined_rooms_backfill, + ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + self._sliding_sync_membership_snapshots_backfill, + ) + async def _background_deduplicate_state( self, progress: dict, batch_size: int ) -> int: @@ -524,3 +546,357 @@ def reindex_txn(conn: LoggingDatabaseConnection) -> None: ) return 1 + + async def _sliding_sync_joined_rooms_backfill( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Handles backfilling the `sliding_sync_joined_rooms` table. + """ + last_room_id = progress.get("last_room_id", "") + + def make_sql_clause_for_get_last_event_pos_in_room( + database_engine: BaseDatabaseEngine, + event_types: Optional[StrCollection] = None, + ) -> Tuple[str, list]: + """ + Returns the ID and event position of the last event in a room at or before a + stream ordering. + + Based on `get_last_event_pos_in_room_before_stream_ordering(...)` + + Args: + database_engine + event_types: Optional allowlist of event types to filter by + + Returns: + A tuple of SQL query and the args + """ + event_type_clause = "" + event_type_args: List[str] = [] + if event_types is not None and len(event_types) > 0: + event_type_clause, event_type_args = make_in_list_sql_clause( + database_engine, "type", event_types + ) + event_type_clause = f"AND {event_type_clause}" + + sql = f""" + SELECT stream_ordering + FROM events + LEFT JOIN rejections USING (event_id) + WHERE room_id = ? + {event_type_clause} + AND NOT outlier + AND rejections.event_id IS NULL + ORDER BY stream_ordering DESC + LIMIT 1 + """ + + return sql, event_type_args + + def _txn(txn: LoggingTransaction) -> int: + # Fetch the set of room IDs that we want to update + txn.execute( + """ + SELECT DISTINCT room_id FROM current_state_events + WHERE room_id > ? + ORDER BY room_id ASC + LIMIT ? + """, + (last_room_id, batch_size), + ) + + rooms_to_update_rows = txn.fetchall() + if not rooms_to_update_rows: + return 0 + + for (room_id,) in rooms_to_update_rows: + current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( + txn, room_id + ) + # We're iterating over rooms pulled from the current_state_events table + # so we should have some current state for each room + assert current_state_map + + sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( + txn, current_state_map + ) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_joined_rooms_insert_map + + ( + most_recent_event_stream_ordering_clause, + most_recent_event_stream_ordering_args, + ) = make_sql_clause_for_get_last_event_pos_in_room( + txn.database_engine, event_types=None + ) + bump_stamp_clause, bump_stamp_args = ( + make_sql_clause_for_get_last_event_pos_in_room( + txn.database_engine, + event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, + ) + ) + + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = sliding_sync_joined_rooms_insert_map.keys() + insert_values = sliding_sync_joined_rooms_insert_map.values() + + sql = f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) + VALUES ( + ?, + ({most_recent_event_stream_ordering_clause}), + ({bump_stamp_clause}), + {", ".join("?" for _ in insert_values)} + ) + ON CONFLICT (room_id) + DO UPDATE SET + event_stream_ordering = EXCLUDED.event_stream_ordering, + bump_stamp = EXCLUDED.bump_stamp, + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """ + args = ( + [room_id, room_id] + + most_recent_event_stream_ordering_args + + [room_id] + + bump_stamp_args + + list(insert_values) + ) + txn.execute(sql, args) + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + {"last_room_id": rooms_to_update_rows[-1][0]}, + ) + + return len(rooms_to_update_rows) + + count = await self.db_pool.runInteraction( + "sliding_sync_joined_rooms_backfill", _txn + ) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL + ) + + return count + + async def _sliding_sync_membership_snapshots_backfill( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Handles backfilling the `sliding_sync_membership_snapshots` table. + """ + last_event_stream_ordering = progress.get( + "last_event_stream_ordering", -(1 << 31) + ) + + def _txn(txn: LoggingTransaction) -> int: + # Fetch the set of event IDs that we want to update + txn.execute( + """ + SELECT + c.room_id, + c.user_id, + c.event_id, + c.membership, + c.event_stream_ordering, + e.outlier + FROM local_current_membership as c + INNER JOIN events AS e USING (event_id) + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering ASC + LIMIT ? + """, + (last_event_stream_ordering, batch_size), + ) + + memberships_to_update_rows = txn.fetchall() + if not memberships_to_update_rows: + return 0 + + for ( + room_id, + user_id, + membership_event_id, + membership, + _membership_event_stream_ordering, + is_outlier, + ) in memberships_to_update_rows: + # We don't know how to handle `membership` values other than these. The + # code below would need to be updated. + assert membership in ( + Membership.JOIN, + Membership.INVITE, + Membership.KNOCK, + Membership.LEAVE, + Membership.BAN, + ) + + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_insert_map: Dict[ + str, Optional[Union[str, bool]] + ] = {} + if membership == Membership.JOIN: + # If we're still joined, we can pull from current state + current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( + txn, room_id + ) + # We're iterating over rooms that we are joined to so they should + # have `current_state_events` and we should have some current state + # for each room + assert current_state_map + + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( + txn, current_state_map + ) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have current state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( + True + ) + elif membership in (Membership.INVITE, Membership.KNOCK) or ( + membership == Membership.LEAVE and is_outlier + ): + invite_or_knock_event_id = membership_event_id + invite_or_knock_membership = membership + + # If the event is an `out_of_band_membership` (special case of + # `outlier`), we never had historical state so we have to pull from + # the stripped state on the previous invite/knock event. This gives + # us a consistent view of the room state regardless of your + # membership (i.e. the room shouldn't disappear if your using the + # `is_encrypted` filter and you leave). + if membership == Membership.LEAVE and is_outlier: + # Find the previous invite/knock event before the leave event + txn.execute( + """ + SELECT event_id, membership + FROM room_memberships + WHERE + room_id = ? + AND user_id = ? + AND event_stream_ordering < ? + ORDER BY event_stream_ordering DESC + LIMIT 1 + """ + ) + row = txn.fetchone() + # We should see a corresponding previous invite/knock event + assert row is not None + invite_or_knock_event_id, invite_or_knock_membership = row + + # Pull from the stripped state on the invite/knock event + txn.execute( + """ + SELECT json FROM event_json + WHERE event_id = ? + """, + (invite_or_knock_event_id), + ) + row = txn.fetchone() + # We should find a corresponding event + assert row is not None + json = row[0] + event_json = db_to_json(json) + + raw_stripped_state_events = None + if invite_or_knock_membership == Membership.INVITE: + invite_room_state = event_json.get("unsigned").get( + "invite_room_state" + ) + raw_stripped_state_events = invite_room_state + elif invite_or_knock_membership == Membership.KNOCK: + knock_room_state = event_json.get("unsigned").get( + "knock_room_state" + ) + raw_stripped_state_events = knock_room_state + + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn( + txn, raw_stripped_state_events + ) + # We should have some insert values for each room, even if no + # stripped state is on the event because we still want to record + # that we have no known state + assert sliding_sync_membership_snapshots_insert_map + elif membership == Membership.BAN: + # Pull from historical state + # state_group = self.db_pool.simple_select_one_onecol_txn( + # table="event_to_state_groups", + # keyvalues={"event_id": membership_event_id}, + # retcol="state_group", + # allow_none=True, + # desc="_get_state_group_for_event", + # ) + # # We should know the state for the event + # assert state_group is not None + + # state_by_group = self._get_state_groups_from_groups_txn( + # txn, [state_group] + # ) + # state_map = state_by_group[state_group] + pass + else: + assert_never(membership) + + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = sliding_sync_membership_snapshots_insert_map.keys() + insert_values = sliding_sync_membership_snapshots_insert_map.values() + # We don't need to do anything `ON CONFLICT` because we never partially + # insert/update the snapshots + txn.execute( + f""" + INSERT INTO sliding_sync_membership_snapshots + (room_id, user_id, membership_event_id, membership, event_stream_ordering + {("," + ", ".join(insert_keys)) if insert_keys else ""}) + VALUES ( + ?, ?, ?, ?, + (SELECT stream_ordering FROM events WHERE event_id = ?) + {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} + ) + ON CONFLICT (room_id, user_id) + DO NOTHING + """, + [ + room_id, + user_id, + membership_event_id, + membership, + membership_event_id, + ] + + list(insert_values), + ) + + ( + _room_id, + _user_id, + _membership_event_id, + _membership, + membership_event_stream_ordering, + _is_outlier, + ) = memberships_to_update_rows[-1] + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + {"last_event_stream_ordering": membership_event_stream_ordering}, + ) + + return len(memberships_to_update_rows) + + count = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill", _txn + ) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL + ) + + return count diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 8875a9364ed..e6a7d4dba1a 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -35,7 +35,7 @@ from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer -from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates +from synapse.storage.databases.state.bg_updates import _BackgroundUpdates from synapse.types import StateMap from synapse.util import Clock From 8461faf3845a42c351bc1e76a56f65a271acfcbe Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 21:56:12 -0500 Subject: [PATCH 049/142] Add historical case to background update --- synapse/storage/databases/main/events.py | 10 ++-- synapse/storage/databases/state/bg_updates.py | 53 ++++++++++++------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 23dd9f2ccee..21540d02799 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1244,7 +1244,7 @@ def _update_current_state_txn( ] = {} if current_state_map: sliding_sync_membership_snapshots_insert_map = ( - self._get_sliding_sync_insert_values_from_current_state_map_txn( + self._get_sliding_sync_insert_values_from_state_map_txn( txn, current_state_map ) ) @@ -1430,7 +1430,7 @@ def _update_current_state_txn( # Map of values to insert/update in the `sliding_sync_joined_rooms` table sliding_sync_joined_rooms_insert_map = ( - self._get_sliding_sync_insert_values_from_current_state_map_txn( + self._get_sliding_sync_insert_values_from_state_map_txn( txn, current_state_map ) ) @@ -1593,8 +1593,8 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( return current_state_map @classmethod - def _get_sliding_sync_insert_values_from_current_state_map_txn( - cls, txn: LoggingTransaction, current_state_map: StateMap[str] + def _get_sliding_sync_insert_values_from_state_map_txn( + cls, txn: LoggingTransaction, state_map: StateMap[str] ) -> Dict[str, Optional[Union[str, bool]]]: """ TODO @@ -1613,7 +1613,7 @@ def _get_sliding_sync_insert_values_from_current_state_map_txn( ) = make_in_list_sql_clause( txn.database_engine, "event_id", - current_state_map.values(), + state_map.values(), ) txn.execute( f""" diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 39c82807d22..9b7843aa3fc 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -32,7 +32,10 @@ LoggingDatabaseConnection, LoggingTransaction, ) -from synapse.storage.databases.main.events import PersistEventsStore +from synapse.storage.databases.main.events import ( + SLIDING_SYNC_RELEVANT_STATE_SET, + PersistEventsStore, +) from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine from synapse.types import JsonDict, MutableStateMap, StateMap, StrCollection from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES @@ -618,7 +621,7 @@ def _txn(txn: LoggingTransaction) -> int: # so we should have some current state for each room assert current_state_map - sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( + sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_map_txn( txn, current_state_map ) # We should have some insert values for each room, even if they are `None` @@ -751,7 +754,7 @@ def _txn(txn: LoggingTransaction) -> int: # for each room assert current_state_map - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_current_state_map_txn( + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_map_txn( txn, current_state_map ) # We should have some insert values for each room, even if they are `None` @@ -827,21 +830,35 @@ def _txn(txn: LoggingTransaction) -> int: assert sliding_sync_membership_snapshots_insert_map elif membership == Membership.BAN: # Pull from historical state - # state_group = self.db_pool.simple_select_one_onecol_txn( - # table="event_to_state_groups", - # keyvalues={"event_id": membership_event_id}, - # retcol="state_group", - # allow_none=True, - # desc="_get_state_group_for_event", - # ) - # # We should know the state for the event - # assert state_group is not None - - # state_by_group = self._get_state_groups_from_groups_txn( - # txn, [state_group] - # ) - # state_map = state_by_group[state_group] - pass + state_group = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_to_state_groups", + keyvalues={"event_id": membership_event_id}, + retcol="state_group", + allow_none=True, + ) + # We should know the state for the event + assert state_group is not None + + state_by_group = self._get_state_groups_from_groups_txn( + txn, + groups=[state_group], + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + ) + state_map = state_by_group[state_group] + + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_map_txn( + txn, state_map + ) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have historical state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( + True + ) else: assert_never(membership) From fb5af8f5fa0d089d72b8ef12e0ea00ca890b11e5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 22:13:32 -0500 Subject: [PATCH 050/142] Add background update test for `sliding_sync_membership_snapshots` --- synapse/storage/databases/state/bg_updates.py | 2 +- tests/storage/test_events.py | 154 ++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 9b7843aa3fc..8f94a76b28e 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -828,7 +828,7 @@ def _txn(txn: LoggingTransaction) -> int: # stripped state is on the event because we still want to record # that we have no known state assert sliding_sync_membership_snapshots_insert_map - elif membership == Membership.BAN: + elif membership in (Membership.LEAVE, Membership.BAN): # Pull from historical state state_group = self.db_pool.simple_select_one_onecol_txn( txn, diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index e6a7d4dba1a..2d98a08eaac 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2456,6 +2456,7 @@ def test_joined_background_update_missing(self) -> None: self.store.db_pool.updates._all_done = False self.wait_for_background_updates() + # Make sure the table is populated sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( set(sliding_sync_joined_rooms_results.keys()), @@ -2598,6 +2599,7 @@ def test_joined_background_update_partial(self) -> None: self.store.db_pool.updates._all_done = False self.wait_for_background_updates() + # Make sure the table is populated sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( set(sliding_sync_joined_rooms_results.keys()), @@ -2620,3 +2622,155 @@ def test_joined_background_update_partial(self) -> None: is_encrypted=True, ), ) + + def test_membership_snapshots_background_update_joined(self) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + backfills missing rows for join memberships. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user1_id, tok=user1_tok) + + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user1_tok, + ) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_joined", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + }, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_no_info) + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_with_info) + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) From ef5f0fca3abdc2cec8254fbfa449d423eb9c50f7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 23:18:50 -0500 Subject: [PATCH 051/142] Add more tests --- synapse/storage/databases/state/bg_updates.py | 11 +- tests/storage/test_events.py | 631 ++++++++++++++++++ 2 files changed, 639 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 8f94a76b28e..244dedae3d2 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -727,7 +727,7 @@ def _txn(txn: LoggingTransaction) -> int: user_id, membership_event_id, membership, - _membership_event_stream_ordering, + membership_event_stream_ordering, is_outlier, ) in memberships_to_update_rows: # We don't know how to handle `membership` values other than these. The @@ -788,7 +788,12 @@ def _txn(txn: LoggingTransaction) -> int: AND event_stream_ordering < ? ORDER BY event_stream_ordering DESC LIMIT 1 - """ + """, + ( + room_id, + user_id, + membership_event_stream_ordering, + ), ) row = txn.fetchone() # We should see a corresponding previous invite/knock event @@ -801,7 +806,7 @@ def _txn(txn: LoggingTransaction) -> int: SELECT json FROM event_json WHERE event_id = ? """, - (invite_or_knock_event_id), + (invite_or_knock_event_id,), ) row = txn.fetchone() # We should find a corresponding event diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 2d98a08eaac..ef139ccfc8e 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2774,3 +2774,634 @@ def test_membership_snapshots_background_update_joined(self) -> None: is_encrypted=False, ), ) + + def test_membership_snapshots_background_update_local_invite(self) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + backfills missing rows for invite memberships. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user2_id, tok=user2_tok) + + room_id_with_info = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # Invite user1 to the rooms + user1_invite_room_id_no_info_response = self.helper.invite( + room_id_no_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invite_room_id_with_info_response = self.helper.invite( + room_id_with_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invite_space_room_id_response = self.helper.invite( + space_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) + + # Have user2 leave the rooms to make sure that our background update is not just + # reading from `current_state_events`. For invite/knock memberships, we should + # be reading from the stripped state on the invite/knock event itself. + self.helper.leave(room_id_no_info, user2_id, tok=user2_tok) + self.helper.leave(room_id_with_info, user2_id, tok=user2_tok) + self.helper.leave(space_room_id, user2_id, tok=user2_tok) + # Check to make sure we actually don't have any `current_state_events` for the rooms + current_state_check_rows = self.get_success( + self.store.db_pool.simple_select_many_batch( + table="current_state_events", + column="room_id", + iterable=[room_id_no_info, room_id_with_info, space_room_id], + retcols=("event_id",), + keyvalues={}, + desc="check current_state_events in test", + ) + ) + self.assertEqual(len(current_state_check_rows), 0) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The invite memberships for user1 + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + # The leave memberships for user2 + (room_id_no_info, user2_id), + (room_id_with_info, user2_id), + (space_room_id, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + membership_event_id=user1_invite_room_id_no_info_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_invite_room_id_no_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + membership_event_id=user1_invite_room_id_with_info_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_invite_room_id_with_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=user1_invite_space_room_id_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_invite_space_room_id_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) + + def test_membership_snapshots_background_update_remote_invite( + self, + ) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + backfills missing rows for remote invites (out-of-band memberships). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_unknown_state, room_id_unknown_state_invite_event = ( + self._create_remote_invite_room_for_user(user1_id, None) + ) + + room_id_no_info, room_id_no_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + ], + ) + ) + + room_id_with_info, room_id_with_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper room", + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + space_room_id, space_room_id_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper space", + }, + ), + ], + ) + ) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=( + room_id_unknown_state, + room_id_no_info, + room_id_with_info, + space_room_id, + ), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The invite memberships for user1 + (room_id_unknown_state, user1_id), + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_unknown_state, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_unknown_state, + user_id=user1_id, + membership_event_id=room_id_unknown_state_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=room_id_unknown_state_invite_event.internal_metadata.stream_ordering, + has_known_state=False, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + membership_event_id=room_id_no_info_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=room_id_no_info_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + membership_event_id=room_id_with_info_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=room_id_with_info_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=space_room_id_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=space_room_id_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) + + def test_membership_snapshots_background_update_remote_invite_rejections_and_retractions( + self, + ) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + backfills missing rows for remote invite rejections/retractions (out-of-band memberships). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_unknown_state, room_id_unknown_state_invite_event = ( + self._create_remote_invite_room_for_user(user1_id, None) + ) + + room_id_no_info, room_id_no_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + ], + ) + ) + + room_id_with_info, room_id_with_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper room", + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + space_room_id, space_room_id_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper space", + }, + ), + ], + ) + ) + + # Reject the remote invites. + # Also try retracting a remote invite. + room_id_unknown_state_leave_event_response = self.helper.leave( + room_id_unknown_state, user1_id, tok=user1_tok + ) + room_id_no_info_leave_event = self._retract_remote_invite_for_user( + user_id=user1_id, + remote_room_id=room_id_no_info, + ) + room_id_with_info_leave_event_response = self.helper.leave( + room_id_with_info, user1_id, tok=user1_tok + ) + space_room_id_leave_event = self._retract_remote_invite_for_user( + user_id=user1_id, + remote_room_id=space_room_id, + ) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=( + room_id_unknown_state, + room_id_no_info, + room_id_with_info, + space_room_id, + ), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The invite memberships for user1 + (room_id_unknown_state, user1_id), + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_unknown_state, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_unknown_state, + user_id=user1_id, + membership_event_id=room_id_unknown_state_leave_event_response[ + "event_id" + ], + membership=Membership.LEAVE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + room_id_unknown_state_leave_event_response["event_id"] + ) + ).stream, + has_known_state=False, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + membership_event_id=room_id_no_info_leave_event.event_id, + membership=Membership.LEAVE, + event_stream_ordering=room_id_no_info_leave_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + membership_event_id=room_id_with_info_leave_event_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + room_id_with_info_leave_event_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=space_room_id_leave_event.event_id, + membership=Membership.LEAVE, + event_stream_ordering=space_room_id_leave_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) From 419be7c6b28476eff009fac157600d031a53bf3d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 23:29:29 -0500 Subject: [PATCH 052/142] Finish off background update tests --- tests/storage/test_events.py | 217 +++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index ef139ccfc8e..37e78f8c15c 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -3405,3 +3405,220 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret is_encrypted=False, ), ) + + @parameterized.expand( + [ + (Membership.LEAVE,), + (Membership.BAN,), + ] + ) + def test_membership_snapshots_background_update_historical_state( + self, test_membership: str + ) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + backfills missing rows for leave memberships. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user2_id, tok=user2_tok) + + room_id_with_info = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # Join the room in preparation for our test_membership + self.helper.join(room_id_no_info, user1_id, tok=user1_tok) + self.helper.join(room_id_with_info, user1_id, tok=user1_tok) + self.helper.join(space_room_id, user1_id, tok=user1_tok) + + if test_membership == Membership.LEAVE: + # Have user1 leave the rooms + user1_membership_room_id_no_info_response = self.helper.leave( + room_id_no_info, user1_id, tok=user1_tok + ) + user1_membership_room_id_with_info_response = self.helper.leave( + room_id_with_info, user1_id, tok=user1_tok + ) + user1_membership_space_room_id_response = self.helper.leave( + space_room_id, user1_id, tok=user1_tok + ) + elif test_membership == Membership.BAN: + # Ban user1 from the rooms + user1_membership_room_id_no_info_response = self.helper.ban( + room_id_no_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_membership_room_id_with_info_response = self.helper.ban( + room_id_with_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_membership_space_room_id_response = self.helper.ban( + space_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) + else: + raise AssertionError("Unknown test_membership") + + # Have user2 leave the rooms to make sure that our background update is not just + # reading from `current_state_events`. For leave memberships, we should be + # reading from the historical state. + self.helper.leave(room_id_no_info, user2_id, tok=user2_tok) + self.helper.leave(room_id_with_info, user2_id, tok=user2_tok) + self.helper.leave(space_room_id, user2_id, tok=user2_tok) + # Check to make sure we actually don't have any `current_state_events` for the rooms + current_state_check_rows = self.get_success( + self.store.db_pool.simple_select_many_batch( + table="current_state_events", + column="room_id", + iterable=[room_id_no_info, room_id_with_info, space_room_id], + retcols=("event_id",), + keyvalues={}, + desc="check current_state_events in test", + ) + ) + self.assertEqual(len(current_state_check_rows), 0) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The memberships for user1 + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + # The leave memberships for user2 + (room_id_no_info, user2_id), + (room_id_with_info, user2_id), + (space_room_id, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + membership_event_id=user1_membership_room_id_no_info_response[ + "event_id" + ], + membership=test_membership, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_membership_room_id_no_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + membership_event_id=user1_membership_room_id_with_info_response[ + "event_id" + ], + membership=test_membership, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_membership_room_id_with_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + membership_event_id=user1_membership_space_room_id_response["event_id"], + membership=test_membership, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_membership_space_room_id_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + ), + ) From fa63c02648a24985ff9fc8fa398e98172b56f1d6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 23:30:16 -0500 Subject: [PATCH 053/142] Fix lints --- tests/storage/test_events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 37e78f8c15c..83149a09213 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2781,7 +2781,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: backfills missing rows for invite memberships. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") user2_id = self.register_user("user2", "pass") user2_tok = self.login(user2_id, "pass") @@ -2969,7 +2969,7 @@ def test_membership_snapshots_background_update_remote_invite( backfills missing rows for remote invites (out-of-band memberships). """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") # Create rooms with various levels of state that should appear in the table # From 2ec93e3f0d742b1d32c17288936d06ed61462df9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 23:39:39 -0500 Subject: [PATCH 054/142] Move function next to other helpers --- synapse/storage/databases/main/events.py | 158 +++++++++++------------ 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 21540d02799..7697dfe4454 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1656,6 +1656,85 @@ def _get_sliding_sync_insert_values_from_state_map_txn( return sliding_sync_insert_map + @classmethod + def _get_sliding_sync_insert_values_from_stripped_state_txn( + cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any + ) -> Dict[str, Optional[Union[str, bool]]]: + """ + TODO + + Returns: + Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant + state values needed to insert into the `sliding_sync_membership_snapshots` tables. + """ + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + + if unsigned_stripped_state_events is not None: + stripped_state_map: MutableStateMap[StrippedStateEvent] = {} + if isinstance(unsigned_stripped_state_events, list): + for raw_stripped_event in unsigned_stripped_state_events: + stripped_state_event = parse_stripped_state_event( + raw_stripped_event + ) + if stripped_state_event is not None: + stripped_state_map[ + ( + stripped_state_event.type, + stripped_state_event.state_key, + ) + ] = stripped_state_event + + # If there is some stripped state, we assume the remote server passed *all* + # of the potential stripped state events for the room. + create_stripped_event = stripped_state_map.get((EventTypes.Create, "")) + # Sanity check that we at-least have the create event + if create_stripped_event is not None: + sliding_sync_insert_map["has_known_state"] = True + + # Find the room_type + sliding_sync_insert_map["room_type"] = ( + create_stripped_event.content.get(EventContentFields.ROOM_TYPE) + if create_stripped_event is not None + else None + ) + + # Find whether the room is_encrypted + encryption_stripped_event = stripped_state_map.get( + (EventTypes.RoomEncryption, "") + ) + encryption = ( + encryption_stripped_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + if encryption_stripped_event is not None + else None + ) + sliding_sync_insert_map["is_encrypted"] = encryption is not None + + # Find the room_name + room_name_stripped_event = stripped_state_map.get((EventTypes.Name, "")) + sliding_sync_insert_map["room_name"] = ( + room_name_stripped_event.content.get(EventContentFields.ROOM_NAME) + if room_name_stripped_event is not None + else None + ) + + else: + # No strip state provided + sliding_sync_insert_map["has_known_state"] = False + sliding_sync_insert_map["room_type"] = None + sliding_sync_insert_map["room_name"] = None + sliding_sync_insert_map["is_encrypted"] = False + else: + # No strip state provided + sliding_sync_insert_map["has_known_state"] = False + sliding_sync_insert_map["room_type"] = None + sliding_sync_insert_map["room_name"] = None + sliding_sync_insert_map["is_encrypted"] = False + + return sliding_sync_insert_map + def _update_sliding_sync_tables_with_new_persisted_events_txn( self, txn: LoggingTransaction, @@ -2438,85 +2517,6 @@ def _store_room_members_txn( values=insert_values, ) - @classmethod - def _get_sliding_sync_insert_values_from_stripped_state_txn( - cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any - ) -> Dict[str, Optional[Union[str, bool]]]: - """ - TODO - - Returns: - Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant - state values needed to insert into the `sliding_sync_membership_snapshots` tables. - """ - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} - - if unsigned_stripped_state_events is not None: - stripped_state_map: MutableStateMap[StrippedStateEvent] = {} - if isinstance(unsigned_stripped_state_events, list): - for raw_stripped_event in unsigned_stripped_state_events: - stripped_state_event = parse_stripped_state_event( - raw_stripped_event - ) - if stripped_state_event is not None: - stripped_state_map[ - ( - stripped_state_event.type, - stripped_state_event.state_key, - ) - ] = stripped_state_event - - # If there is some stripped state, we assume the remote server passed *all* - # of the potential stripped state events for the room. - create_stripped_event = stripped_state_map.get((EventTypes.Create, "")) - # Sanity check that we at-least have the create event - if create_stripped_event is not None: - sliding_sync_insert_map["has_known_state"] = True - - # Find the room_type - sliding_sync_insert_map["room_type"] = ( - create_stripped_event.content.get(EventContentFields.ROOM_TYPE) - if create_stripped_event is not None - else None - ) - - # Find whether the room is_encrypted - encryption_stripped_event = stripped_state_map.get( - (EventTypes.RoomEncryption, "") - ) - encryption = ( - encryption_stripped_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - if encryption_stripped_event is not None - else None - ) - sliding_sync_insert_map["is_encrypted"] = encryption is not None - - # Find the room_name - room_name_stripped_event = stripped_state_map.get((EventTypes.Name, "")) - sliding_sync_insert_map["room_name"] = ( - room_name_stripped_event.content.get(EventContentFields.ROOM_NAME) - if room_name_stripped_event is not None - else None - ) - - else: - # No strip state provided - sliding_sync_insert_map["has_known_state"] = False - sliding_sync_insert_map["room_type"] = None - sliding_sync_insert_map["room_name"] = None - sliding_sync_insert_map["is_encrypted"] = False - else: - # No strip state provided - sliding_sync_insert_map["has_known_state"] = False - sliding_sync_insert_map["room_type"] = None - sliding_sync_insert_map["room_name"] = None - sliding_sync_insert_map["is_encrypted"] = False - - return sliding_sync_insert_map - def _handle_event_relations( self, txn: LoggingTransaction, event: EventBase ) -> None: From c89d859c7c0281a708591666eb7ed66ba02dc1b1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Aug 2024 23:52:01 -0500 Subject: [PATCH 055/142] Fill in docstrings --- synapse/storage/databases/main/events.py | 12 +++++++++--- synapse/storage/schema/__init__.py | 3 ++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 7697dfe4454..fb669a73116 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1565,7 +1565,10 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( Fetch the current state event IDs for the relevant (to the `sliding_sync_joined_rooms` table) state types for the given room. - TODO + Returns: + StateMap of event IDs necessary to to fetch the relevant state values needed + to insert into the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots`. """ # Fetch the current state event IDs from the database ( @@ -1597,7 +1600,9 @@ def _get_sliding_sync_insert_values_from_state_map_txn( cls, txn: LoggingTransaction, state_map: StateMap[str] ) -> Dict[str, Optional[Union[str, bool]]]: """ - TODO + Fetch events in the `state_map` and extract the relevant state values needed to + insert into the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` + tables. Returns: Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant @@ -1661,7 +1666,8 @@ def _get_sliding_sync_insert_values_from_stripped_state_txn( cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any ) -> Dict[str, Optional[Union[str, bool]]]: """ - TODO + Pull out the relevant state values from the stripped state needed to insert into + the `sliding_sync_membership_snapshots` tables. Returns: Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 82790024399..316541d8180 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -144,7 +144,8 @@ - Add a column `authenticated` to the tables `local_media_repository` and `remote_media_cache` Changes in SCHEMA_VERSION = 87 - - TODO + - Add tables to store Sliding Sync data for quick filtering/sorting + (`sliding_sync_joined_rooms`, `sliding_sync_membership_snapshots`) """ From d2f5247e770c6717fbb98d34c9d513f950b4304f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 16 Aug 2024 00:15:03 -0500 Subject: [PATCH 056/142] Update comment --- synapse/storage/databases/main/events.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index fb669a73116..b566332869b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1261,8 +1261,9 @@ def _update_current_state_txn( # rejects the invite (leaves the room), we will end up here. # # In these cases, we should inherit the meta data from the previous - # snapshot. When using sliding sync filters, this will prevent the - # room from disappearing/appearing just because you left the room. + # snapshot (handled by the default `ON CONFLICT ... DO UPDATE SET`). + # When using sliding sync filters, this will prevent the room from + # disappearing/appearing just because you left the room. # # Ideally, we could additionally assert that we're only here for # valid non-join membership transitions. From 98fb56e5fefc95563d2a42a591570e97f7a86d77 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 19 Aug 2024 22:30:50 -0500 Subject: [PATCH 057/142] Prefer `_update_sliding_sync_tables_with_new_persisted_events_txn(...)` to do the right thing See https://github.com/element-hq/synapse/pull/17512#discussion_r1719992152 --- synapse/storage/databases/main/events.py | 28 ++---------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b566332869b..fa63ca7cf8b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1417,18 +1417,11 @@ def _update_current_state_txn( # persisting stack (see # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) # - bump_event_id = None current_state_map = {} for state_key, event_id in to_insert.items(): if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: current_state_map[state_key] = event_id - if ( - state_key[0] in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES - and state_key[1] == "" - ): - bump_event_id = event_id - # Map of values to insert/update in the `sliding_sync_joined_rooms` table sliding_sync_joined_rooms_insert_map = ( self._get_sliding_sync_insert_values_from_state_map_txn( @@ -1465,37 +1458,20 @@ def _update_current_state_txn( # just to account for things changing in the future. next(iter(to_insert.values())), ] - # If we have a `bump_event_id`, let's update the `bump_stamp` column - bump_stamp_column = "" - bump_stamp_values_clause = "" - if bump_event_id is not None: - bump_stamp_column = "bump_stamp, " - bump_stamp_values_clause = ( - "(SELECT stream_ordering FROM events WHERE event_id = ?)," - ) - args.append(bump_event_id) args.extend(iter(insert_values)) # We don't update `event_stream_ordering` `ON CONFLICT` because it's # simpler and we can just rely on # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to do - # the right thing. - # - # We don't update `bump_stamp` `ON CONFLICT` because we're dealing with - # state here and the only state event that is also a bump event type is - # `m.room.create`. Given the room creation event is the first one in the - # room, it's either going to be set on insert, or we've already moved on - # to other events with a greater `stream_ordering`/`bump_stamp` and we - # don't need to even try. + # the right thing (same for `bump_stamp`). txn.execute( f""" INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, {bump_stamp_column} {", ".join(insert_keys)}) + (room_id, event_stream_ordering, {", ".join(insert_keys)}) VALUES ( ?, (SELECT stream_ordering FROM events WHERE event_id = ?), - {bump_stamp_values_clause} {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id) From 8ee2e114dd0228d62fb48758cf8dbfa02f5aae4e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 19 Aug 2024 23:22:24 -0500 Subject: [PATCH 058/142] Add test to handle state reset in the meta data --- synapse/storage/databases/main/events.py | 86 +++++++---- tests/storage/test_events.py | 183 ++++++++++++++++++++++- 2 files changed, 235 insertions(+), 34 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index fa63ca7cf8b..10865b12748 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1445,41 +1445,61 @@ def _update_current_state_txn( insert_values = sliding_sync_joined_rooms_insert_map.values() # We only need to update when one of the relevant state values has changed if insert_keys: - args: List[Any] = [ - room_id, - # Even though `Mapping`/`Dict` have no guaranteed order, some - # implementations may preserve insertion order so we're just going to - # choose the best possible answer by using the "first" event ID which we - # will assume will have the greatest `stream_ordering`. We really just - # need *some* answer in case we are the first ones inserting into the - # table and in reality, - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` is run - # after this function to update it to the correct latest value. This is - # just to account for things changing in the future. - next(iter(to_insert.values())), - ] - - args.extend(iter(insert_values)) + # If we have some `to_insert` values, we can use the standard upsert + # pattern because we have access to an `event_id` to use for the + # `event_stream_ordering` which has a `NON NULL` constraint. + if to_insert: + args: List[Any] = [ + room_id, + # Even though `Mapping`/`Dict` have no guaranteed order, some + # implementations may preserve insertion order so we're just + # going to choose the best possible answer by using the "first" + # event ID which we will assume will have the greatest + # `stream_ordering`. We really just need *some* answer in case + # we are the first ones inserting into the table because of the + # `NON NULL` constraint on `event_stream_ordering`. In reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` + # is run after this function to update it to the correct latest + # value. + next(iter(to_insert.values())), + ] + + args.extend(iter(insert_values)) + + # We don't update `event_stream_ordering` `ON CONFLICT` because it's + # simpler and we can just rely on + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to do + # the right thing (same for `bump_stamp`). + txn.execute( + f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, event_stream_ordering, {", ".join(insert_keys)}) + VALUES ( + ?, + (SELECT stream_ordering FROM events WHERE event_id = ?), + {", ".join("?" for _ in insert_values)} + ) + ON CONFLICT (room_id) + DO UPDATE SET + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """, + args, + ) - # We don't update `event_stream_ordering` `ON CONFLICT` because it's - # simpler and we can just rely on - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to do - # the right thing (same for `bump_stamp`). - txn.execute( - f""" - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, {", ".join(insert_keys)}) - VALUES ( - ?, - (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in insert_values)} + # If there are only values `to_delete`, we have to use an `UPDATE` + # instead because there is no `event_id` to use for the `NON NULL` + # constraint on `event_stream_ordering`. + elif to_delete: + args = list(insert_values) + [room_id] + txn.execute( + f""" + UPDATE sliding_sync_joined_rooms + SET + {", ".join(f"{key} = ?" for key in insert_keys)} + WHERE room_id = ? + """, + args, ) - ON CONFLICT (room_id) - DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} - """, - args, - ) # We now update `local_current_membership`. We do this regardless # of whether we're still in the room or not to handle the case where diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 83149a09213..bb59603d33a 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -35,10 +35,12 @@ from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.state.bg_updates import _BackgroundUpdates from synapse.types import StateMap from synapse.util import Clock +from tests.test_utils.event_injection import create_event from tests.unittest import HomeserverTestCase logger = logging.getLogger(__name__) @@ -540,6 +542,9 @@ class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main self.storage_controllers = hs.get_storage_controllers() + persist_events_store = self.hs.get_datastores().persist_events + assert persist_events_store is not None + self.persist_events_store = persist_events_store def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResult]: """ @@ -1313,7 +1318,183 @@ def test_joined_room_is_bumped(self) -> None: user2_snapshot, ) - # TODO: test_joined_room_state_reset + def test_joined_room_meta_state_reset(self) -> None: + """ + Test that a state reset on the room name is reflected in the + `sliding_sync_joined_rooms` table. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Make sure we see the new room name + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined (no room + # name when the room creator joined) + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Mock a state reset removing the room name state from the current state + message_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[state_map[(EventTypes.Name, "")].event_id], + auth_event_ids=[ + state_map[(EventTypes.Create, "")].event_id, + state_map[(EventTypes.Member, user1_id)].event_id, + ], + type=EventTypes.Message, + content={"body": "foo", "msgtype": "m.text"}, + sender=user1_id, + room_id=room_id, + room_version=RoomVersions.V10.identifier, + ) + ) + event_chunk = [message_tuple] + self.get_success( + self.persist_events_store._persist_events_and_state_updates( + room_id, + event_chunk, + state_delta_for_room=DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Name, "")], + to_insert={}, + ), + new_forward_extremities={message_tuple[0].event_id}, + use_negative_stream_ordering=False, + inhibit_local_membership_updates=False, + new_event_links={}, + ) + ) + + # Make sure the state reset is reflected in the `sliding_sync_joined_rooms` table + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=message_tuple[ + 0 + ].internal_metadata.stream_ordering, + bump_stamp=message_tuple[0].internal_metadata.stream_ordering, + room_type=None, + # This was state reset back to None + room_name=None, + is_encrypted=False, + ), + ) + + # State reset shouldn't be reflected in the `sliding_sync_membership_snapshots` + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Snapshots haven't changed + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) def test_non_join_space_room_with_info(self) -> None: """ From 574a04a40f138db199edefd7a79235a1b1ee5d20 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 19 Aug 2024 23:30:25 -0500 Subject: [PATCH 059/142] Test state reset on membership --- tests/storage/test_events.py | 172 ++++++++++++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 1 deletion(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index bb59603d33a..f6f81ea9549 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2559,7 +2559,177 @@ def test_non_join_retracted_remote_invite(self) -> None: ), ) - # TODO: test_non_join_state_reset + def test_non_join_state_reset(self) -> None: + """ + Test a state reset that removes someone from the room. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Make sure we see the new room name + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined (no room + # name when the room creator joined) + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Mock a state reset removing the membership for user1 in the current state + message_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[state_map[(EventTypes.Name, "")].event_id], + auth_event_ids=[ + state_map[(EventTypes.Create, "")].event_id, + state_map[(EventTypes.Member, user1_id)].event_id, + ], + type=EventTypes.Message, + content={"body": "foo", "msgtype": "m.text"}, + sender=user1_id, + room_id=room_id, + room_version=RoomVersions.V10.identifier, + ) + ) + event_chunk = [message_tuple] + self.get_success( + self.persist_events_store._persist_events_and_state_updates( + room_id, + event_chunk, + state_delta_for_room=DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Member, user1_id)], + to_insert={}, + ), + new_forward_extremities={message_tuple[0].event_id}, + use_negative_stream_ordering=False, + inhibit_local_membership_updates=False, + new_event_links={}, + ) + ) + + # State reset on membership doesn't affect the`sliding_sync_joined_rooms` table + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=message_tuple[ + 0 + ].internal_metadata.stream_ordering, + bump_stamp=message_tuple[0].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + ), + ) + + # State reset on membership should remove the user's snapshot + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # We shouldn't see user1 in the snapshots table anymore + (room_id, user2_id), + }, + exact=True, + ) + # Snapshot for user2 hasn't changed + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) def test_joined_background_update_missing(self) -> None: """ From 6cc6bdbedfb892a204cb202226d5d9fad8d49e45 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 11:10:34 -0500 Subject: [PATCH 060/142] Start of moving logic outside of the transaction (pre-process) --- synapse/storage/controllers/persist_events.py | 163 ++++++++++++++- synapse/storage/databases/main/events.py | 185 ++++++++---------- synapse/storage/databases/state/bg_updates.py | 6 +- 3 files changed, 250 insertions(+), 104 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index d0e015bf19c..9e30cec028f 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -49,7 +49,7 @@ from twisted.internet import defer -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, Membership, EventContentFields from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME @@ -64,8 +64,16 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases -from synapse.storage.databases.main.events import DeltaState +from synapse.storage.databases.main.events import ( + DeltaState, + SlidingSyncTableChanges, + SlidingSyncStateInsertValues, + SlidingSyncSnapshotInsertValues, +) from synapse.storage.databases.main.events_worker import EventRedactBehaviour +from synapse.storage.databases.main.events import ( + SLIDING_SYNC_RELEVANT_STATE_SET, +) from synapse.types import ( PersistedEventPosition, RoomStreamToken, @@ -604,6 +612,7 @@ async def _persist_event_batch( new_forward_extremities = None state_delta_for_room = None + sliding_sync_table_changes = None if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): @@ -617,6 +626,13 @@ async def _persist_event_batch( room_id, chunk ) + with Measure(self._clock, "_calculate_sliding_sync_table_changes"): + sliding_sync_table_changes = ( + await self._calculate_sliding_sync_table_changes( + room_id, chunk, state_delta_for_room + ) + ) + with Measure(self._clock, "calculate_chain_cover_index_for_events"): # We now calculate chain ID/sequence numbers for any state events we're # persisting. We ignore out of band memberships as we're not in the room @@ -636,6 +652,7 @@ async def _persist_event_batch( use_negative_stream_ordering=backfilled, inhibit_local_membership_updates=backfilled, new_event_links=new_event_links, + sliding_sync_table_changes=sliding_sync_table_changes, ) return replaced_events @@ -751,6 +768,148 @@ async def _calculate_new_forward_extremities_and_state_delta( return (new_forward_extremities, delta) + async def _calculate_sliding_sync_table_changes( + self, + room_id: str, + events_and_contexts: List[Tuple[EventBase, EventContext]], + delta_state: Optional[DeltaState], + ) -> Optional[SlidingSyncTableChanges]: + """ + TODO + """ + to_insert = delta_state.to_insert + to_delete = delta_state.to_delete + + # This would only happen if someone was state reset out of the room + to_delete_membership_snapshots = { + (room_id, state_key) + for event_type, state_key in to_delete + if event_type == EventTypes.Member and self.is_mine_id(state_key) + } + + membership_snapshot_updates = {} + if to_insert: + membership_event_id_to_user_id_map: Dict[str, str] = {} + for state_key, event_id in to_insert.items(): + if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): + membership_event_id_to_user_id_map[event_id] = state_key[1] + + if len(membership_event_id_to_user_id_map) > 0: + current_state_ids_map = ( + await self.main_store.get_partial_filtered_current_state_ids( + room_id, + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + ) + ) + # Since we fetched the current state before we took `to_insert`/`to_delete` + # into account, we need to do a couple fixups. + # + # Update the current_state_map with what we have `to_delete` + for state_key in to_delete: + current_state_ids_map.pop(state_key, None) + # Update the current_state_map with what we have `to_insert` + for state_key, event_id in to_insert.items(): + if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: + current_state_ids_map[state_key] = event_id + + event_map = await self.main_store.get_events( + current_state_ids_map.values() + ) + + current_state_map = {} + for key, event_id in current_state_ids_map.items(): + event = event_map.get(event_id) + if event: + current_state_map[key] = event + + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_insert_values = None + has_known_state = False + if current_state_map: + sliding_sync_insert_values = ( + self._get_sliding_sync_insert_values_from_state_map( + current_state_map + ) + ) + # We have current state to work from + has_known_state = True + else: + # We don't have any `current_state_events` anymore (previously + # cleared out because of `no_longer_in_room`). This can happen if + # one user is joined and another is invited (some non-join + # membership). If the joined user leaves, we are `no_longer_in_room` + # and `current_state_events` is cleared out. When the invited user + # rejects the invite (leaves the room), we will end up here. + # + # In these cases, we should inherit the meta data from the previous + # snapshot (handled by the default `ON CONFLICT ... DO UPDATE SET`). + # When using sliding sync filters, this will prevent the room from + # disappearing/appearing just because you left the room. + # + # Ideally, we could additionally assert that we're only here for + # valid non-join membership transitions. + assert delta_state.no_longer_in_room + + membership_snapshot_updates = { + (room_id, user_id): SlidingSyncSnapshotInsertValues( + membership_event_id=membership_event_id, + has_known_state=has_known_state, + ) + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() + } + + return SlidingSyncTableChanges( + joined_room_updates=TODO, + to_delete_joined_rooms=TODO, + membership_snapshot_updates=membership_snapshot_updates, + to_delete_membership_snapshots=to_delete_membership_snapshots, + ) + + @classmethod + def _get_sliding_sync_insert_values_from_state_map( + cls, state_map: StateMap[EventBase] + ) -> SlidingSyncStateInsertValues: + """ + Extract the relevant state values from the `state_map` needed to insert into the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + + Returns: + Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant + state values needed to insert into + the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + """ + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + + # Parse the raw event JSON + for state_key, event in state_map.items(): + if state_key == (EventTypes.Create, ""): + room_type = event.content.get(EventContentFields.ROOM_TYPE) + sliding_sync_insert_map["room_type"] = room_type + elif state_key == (EventTypes.RoomEncryption, ""): + encryption_algorithm = event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is_encrypted = encryption_algorithm is not None + sliding_sync_insert_map["is_encrypted"] = is_encrypted + elif state_key == (EventTypes.Name, ""): + room_name = event.content.get(EventContentFields.ROOM_NAME) + sliding_sync_insert_map["room_name"] = room_name + else: + # We only expect to see events according to the + # `SLIDING_SYNC_RELEVANT_STATE_SET`. + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): {state_key} {event.event_id}" + ) + + return SlidingSyncStateInsertValues( + room_type=room_type, + is_encrypted=encryption_algorithm, + room_name=room_name, + ) + async def _calculate_new_extremities( self, room_id: str, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 10865b12748..9022701e309 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -125,6 +125,33 @@ def is_noop(self) -> bool: return not self.to_delete and not self.to_insert and not self.no_longer_in_room +@attr.s(slots=True, auto_attribs=True) +class SlidingSyncStateInsertValues: + room_type: Optional[str] + is_encrypted: Optional[bool] + room_name: Optional[str] + + +@attr.s(slots=True, auto_attribs=True) +class SlidingSyncSnapshotInsertValues(SlidingSyncStateInsertValues): + membership_event_id: str + has_known_state: Optional[bool] + # TODO: `sender` + + +@attr.s(slots=True, auto_attribs=True) +class SlidingSyncTableChanges: + # room_id -> dict to upsert into `sliding_sync_joined_rooms` + joined_room_updates: Dict[str, SlidingSyncStateInsertValues] + # room_ids to delete from `sliding_sync_joined_rooms` + to_delete_joined_rooms: StrCollection + + # (room_id, user_id) -> dict to upsert into sliding_sync_membership_snapshots + membership_snapshot_updates: Dict[Tuple[str, str], SlidingSyncSnapshotInsertValues] + # List of (room_id, user_id) to delete from `sliding_sync_membership_snapshots` + to_delete_membership_snapshots: Set[Tuple[str, str]] + + @attr.s(slots=True, auto_attribs=True) class NewEventChainLinks: """Information about new auth chain links that need to be added to the DB. @@ -193,6 +220,7 @@ async def _persist_events_and_state_updates( new_event_links: Dict[str, NewEventChainLinks], use_negative_stream_ordering: bool = False, inhibit_local_membership_updates: bool = False, + sliding_sync_table_changes: Optional[SlidingSyncTableChanges], ) -> None: """Persist a set of events alongside updates to the current state and forward extremities tables. @@ -213,6 +241,7 @@ async def _persist_events_and_state_updates( from being updated by these events. This should be set to True for backfilled events because backfilled events in the past do not affect the current local state. + sliding_sync_table_changes: TODO Returns: Resolves when the events have been persisted @@ -261,6 +290,7 @@ async def _persist_events_and_state_updates( state_delta_for_room=state_delta_for_room, new_forward_extremities=new_forward_extremities, new_event_links=new_event_links, + sliding_sync_table_changes=sliding_sync_table_changes, ) persist_event_counter.inc(len(events_and_contexts)) @@ -484,6 +514,7 @@ def _persist_events_txn( state_delta_for_room: Optional[DeltaState], new_forward_extremities: Optional[Set[str]], new_event_links: Dict[str, NewEventChainLinks], + sliding_sync_table_changes: Optional[SlidingSyncTableChanges], ) -> None: """Insert some number of room events into the necessary database tables. @@ -507,6 +538,7 @@ def _persist_events_txn( state_delta_for_room: The current-state delta for the room. new_forward_extremities: The new forward extremities for the room: a set of the event ids which are the forward extremities. + sliding_sync_table_changes: TODO Raises: PartialStateConflictError: if attempting to persist a partial state event in @@ -617,7 +649,11 @@ def _persist_events_txn( # NB: This function invalidates all state related caches if state_delta_for_room: self._update_current_state_txn( - txn, room_id, state_delta_for_room, min_stream_order + txn, + room_id, + state_delta_for_room, + min_stream_order, + sliding_sync_table_changes, ) self._update_sliding_sync_tables_with_new_persisted_events_txn( @@ -1179,6 +1215,7 @@ def _update_current_state_txn( room_id: str, delta_state: DeltaState, stream_id: int, + sliding_sync_table_changes: Optional[SlidingSyncTableChanges], ) -> None: to_delete = delta_state.to_delete to_insert = delta_state.to_insert @@ -1197,15 +1234,11 @@ def _update_current_state_txn( # Handle updating the `sliding_sync_membership_snapshots` table # # This would only happen if someone was state reset out of the room - if to_delete: + if sliding_sync_table_changes.to_delete_membership_snapshots: txn.execute_batch( "DELETE FROM sliding_sync_membership_snapshots" " WHERE room_id = ? AND user_id = ?", - ( - (room_id, state_key) - for event_type, state_key in to_delete - if event_type == EventTypes.Member and self.is_mine_id(state_key) - ), + sliding_sync_table_changes.to_delete_membership_snapshots, ) # We handle `sliding_sync_membership_snapshots` before `current_state_events` so @@ -1215,99 +1248,53 @@ def _update_current_state_txn( # We do this regardless of whether the server is `no_longer_in_room` or not # because we still want a row if a local user was just left/kicked or got banned # from the room. - if to_insert: - membership_event_id_to_user_id_map: Dict[str, str] = {} - for state_key, event_id in to_insert.items(): - if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): - membership_event_id_to_user_id_map[event_id] = state_key[1] + if sliding_sync_table_changes.membership_snapshot_updates: - if len(membership_event_id_to_user_id_map) > 0: - current_state_map = ( - self._get_relevant_sliding_sync_current_state_event_ids_txn( - txn, room_id - ) - ) - # Since we fetched the current state before we took `to_insert`/`to_delete` - # into account, we need to do a couple fixups. - # - # Update the current_state_map with what we have `to_delete` - for state_key in to_delete: - current_state_map.pop(state_key, None) - # Update the current_state_map with what we have `to_insert` - for state_key, event_id in to_insert.items(): - if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: - current_state_map[state_key] = event_id - - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} - if current_state_map: - sliding_sync_membership_snapshots_insert_map = ( - self._get_sliding_sync_insert_values_from_state_map_txn( - txn, current_state_map - ) - ) - # We have current state to work from - sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( - True - ) - else: - # We don't have any `current_state_events` anymore (previously - # cleared out because of `no_longer_in_room`). This can happen if - # one user is joined and another is invited (some non-join - # membership). If the joined user leaves, we are `no_longer_in_room` - # and `current_state_events` is cleared out. When the invited user - # rejects the invite (leaves the room), we will end up here. - # - # In these cases, we should inherit the meta data from the previous - # snapshot (handled by the default `ON CONFLICT ... DO UPDATE SET`). - # When using sliding sync filters, this will prevent the room from - # disappearing/appearing just because you left the room. - # - # Ideally, we could additionally assert that we're only here for - # valid non-join membership transitions. - assert delta_state.no_longer_in_room + # TODO + [ + getattr(x, attr_name) + for attr_name in ["room_type", "is_encrypted", "room_name"] + ] - # Update the `sliding_sync_membership_snapshots` table - # - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = sliding_sync_membership_snapshots_insert_map.keys() - insert_values = sliding_sync_membership_snapshots_insert_map.values() - # We need to insert/update regardless of whether we have `insert_keys` - # because there are other fields in the `ON CONFLICT` upsert to run (see - # inherit case above for more context when this happens). - txn.execute_batch( - f""" - INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering - {("," + ", ".join(insert_keys)) if insert_keys else ""}) - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) - {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} - ) - ON CONFLICT (room_id, user_id) - DO UPDATE SET - membership_event_id = EXCLUDED.membership_event_id, - membership = EXCLUDED.membership, - event_stream_ordering = EXCLUDED.event_stream_ordering - {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} - """, - [ - [ - room_id, - user_id, - membership_event_id, - membership_event_id, - membership_event_id, - ] - + list(insert_values) - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - ], + # Update the `sliding_sync_membership_snapshots` table + # + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = sliding_sync_membership_snapshots_insert_map.keys() + insert_values = sliding_sync_membership_snapshots_insert_map.values() + # We need to insert/update regardless of whether we have `insert_keys` + # because there are other fields in the `ON CONFLICT` upsert to run (see + # inherit case above for more context when this happens). + txn.execute_batch( + f""" + INSERT INTO sliding_sync_membership_snapshots + (room_id, user_id, membership_event_id, membership, event_stream_ordering + {("," + ", ".join(insert_keys)) if insert_keys else ""}) + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?) + {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) + ON CONFLICT (room_id, user_id) + DO UPDATE SET + membership_event_id = EXCLUDED.membership_event_id, + membership = EXCLUDED.membership, + event_stream_ordering = EXCLUDED.event_stream_ordering + {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} + """, + [ + [ + room_id, + user_id, + membership_event_id, + membership_event_id, + membership_event_id, + ] + + list(insert_values) + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() + ], + ) if delta_state.no_longer_in_room: # Server is no longer in the room so we delete the room from @@ -1424,7 +1411,7 @@ def _update_current_state_txn( # Map of values to insert/update in the `sliding_sync_joined_rooms` table sliding_sync_joined_rooms_insert_map = ( - self._get_sliding_sync_insert_values_from_state_map_txn( + self._get_sliding_sync_insert_values_from_state_ids_map_txn( txn, current_state_map ) ) @@ -1593,7 +1580,7 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( return current_state_map @classmethod - def _get_sliding_sync_insert_values_from_state_map_txn( + def _get_sliding_sync_insert_values_from_state_ids_map_txn( cls, txn: LoggingTransaction, state_map: StateMap[str] ) -> Dict[str, Optional[Union[str, bool]]]: """ diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 244dedae3d2..526bf7ea62d 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -621,7 +621,7 @@ def _txn(txn: LoggingTransaction) -> int: # so we should have some current state for each room assert current_state_map - sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_map_txn( + sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( txn, current_state_map ) # We should have some insert values for each room, even if they are `None` @@ -754,7 +754,7 @@ def _txn(txn: LoggingTransaction) -> int: # for each room assert current_state_map - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_map_txn( + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( txn, current_state_map ) # We should have some insert values for each room, even if they are `None` @@ -854,7 +854,7 @@ def _txn(txn: LoggingTransaction) -> int: ) state_map = state_by_group[state_group] - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_map_txn( + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( txn, state_map ) # We should have some insert values for each room, even if they are `None` From 95d39db772049cba416ec87da22fe967635937bf Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 11:55:24 -0500 Subject: [PATCH 061/142] Closer types --- synapse/storage/controllers/persist_events.py | 34 +++++++++++---- synapse/storage/databases/main/events.py | 43 ++++++++++++++----- 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 9e30cec028f..0bc60077fbf 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -68,7 +68,8 @@ DeltaState, SlidingSyncTableChanges, SlidingSyncStateInsertValues, - SlidingSyncSnapshotInsertValues, + SlidingSyncMembershipSnapshotSharedInsertValues, + SlidingSyncMembershipInfo, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.databases.main.events import ( @@ -781,20 +782,31 @@ async def _calculate_sliding_sync_table_changes( to_delete = delta_state.to_delete # This would only happen if someone was state reset out of the room - to_delete_membership_snapshots = { - (room_id, state_key) + to_delete_membership_snapshots = [ + state_key for event_type, state_key in to_delete if event_type == EventTypes.Member and self.is_mine_id(state_key) - } + ] membership_snapshot_updates = {} + membership_infos: List[SlidingSyncMembershipInfo] = [] if to_insert: membership_event_id_to_user_id_map: Dict[str, str] = {} for state_key, event_id in to_insert.items(): if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): membership_event_id_to_user_id_map[event_id] = state_key[1] - if len(membership_event_id_to_user_id_map) > 0: + event_id_to_sender_map = await _get_sender_for_event_ids(membership_event_id_to_user_id_map.keys()) + membership_infos = [ + SlidingSyncMembershipInfo( + user_id=user_id, + sender=event_id_to_sender_map[event_id], + membership_event_id=membership_event_id + ) + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() + ] + + if membership_infos: current_state_ids_map = ( await self.main_store.get_partial_filtered_current_state_ids( room_id, @@ -825,16 +837,16 @@ async def _calculate_sliding_sync_table_changes( current_state_map[key] = event # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_values = None + membership_snapshot_shared_insert_values = SlidingSyncMembershipSnapshotSharedInsertValues() has_known_state = False if current_state_map: - sliding_sync_insert_values = ( + state_insert_values = ( self._get_sliding_sync_insert_values_from_state_map( current_state_map ) ) # We have current state to work from - has_known_state = True + membership_snapshot_shared_insert_values.has_known_state = True else: # We don't have any `current_state_events` anymore (previously # cleared out because of `no_longer_in_room`). This can happen if @@ -861,9 +873,13 @@ async def _calculate_sliding_sync_table_changes( } return SlidingSyncTableChanges( + room_id=room_id, + # For `sliding_sync_joined_rooms` joined_room_updates=TODO, to_delete_joined_rooms=TODO, - membership_snapshot_updates=membership_snapshot_updates, + # For `sliding_sync_membership_snapshots` + membership_snapshot_shared_insert_values=TODO, + to_insert_membership_snapshots=membership_infos, to_delete_membership_snapshots=to_delete_membership_snapshots, ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 9022701e309..1c6eecff94f 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -127,29 +127,48 @@ def is_noop(self) -> bool: @attr.s(slots=True, auto_attribs=True) class SlidingSyncStateInsertValues: + """ + Insert values relevant for the `sliding_sync_joined_rooms` and + `sliding_sync_membership_snapshots` database tables. + """ room_type: Optional[str] is_encrypted: Optional[bool] room_name: Optional[str] @attr.s(slots=True, auto_attribs=True) -class SlidingSyncSnapshotInsertValues(SlidingSyncStateInsertValues): - membership_event_id: str - has_known_state: Optional[bool] - # TODO: `sender` +class SlidingSyncMembershipSnapshotSharedInsertValues(SlidingSyncStateInsertValues): + """ + Insert values for `sliding_sync_membership_snapshots` that we can share across + multiple memberships + """ + has_known_state: bool + # TODO: tombstone_successor_room_id: Optional[str] +@attr.s(slots=True, auto_attribs=True) +class SlidingSyncMembershipInfo(SlidingSyncStateInsertValues): + """ + Values unique to each membership + """ + user_id: str + sender: str + membership_event_id: str @attr.s(slots=True, auto_attribs=True) class SlidingSyncTableChanges: + room_id: str # room_id -> dict to upsert into `sliding_sync_joined_rooms` joined_room_updates: Dict[str, SlidingSyncStateInsertValues] # room_ids to delete from `sliding_sync_joined_rooms` to_delete_joined_rooms: StrCollection - # (room_id, user_id) -> dict to upsert into sliding_sync_membership_snapshots - membership_snapshot_updates: Dict[Tuple[str, str], SlidingSyncSnapshotInsertValues] - # List of (room_id, user_id) to delete from `sliding_sync_membership_snapshots` - to_delete_membership_snapshots: Set[Tuple[str, str]] + # Shared values to upsert into `sliding_sync_membership_snapshots` for each + # `to_insert_membership_snapshots` + membership_snapshot_shared_insert_values: SlidingSyncMembershipSnapshotSharedInsertValues + # List of membership to insert into `sliding_sync_membership_snapshots` + to_insert_membership_snapshots: List[SlidingSyncMembershipInfo] + # List of user_id to delete from `sliding_sync_membership_snapshots` + to_delete_membership_snapshots: List[str] @attr.s(slots=True, auto_attribs=True) @@ -1251,10 +1270,12 @@ def _update_current_state_txn( if sliding_sync_table_changes.membership_snapshot_updates: # TODO - [ - getattr(x, attr_name) + for asdf in sliding_sync_table_changes.membership_snapshot_updates: for attr_name in ["room_type", "is_encrypted", "room_name"] - ] + [ + getattr(x, attr_name) + + ] # Update the `sliding_sync_membership_snapshots` table # From 2964c567d3cd53a34219aec5e59691ab7e965f8d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 13:21:19 -0500 Subject: [PATCH 062/142] Use dicts --- synapse/storage/controllers/persist_events.py | 134 ++++++----- synapse/storage/databases/main/events.py | 208 ++++++++++-------- 2 files changed, 194 insertions(+), 148 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 0bc60077fbf..68f3bb0b94c 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -30,6 +30,7 @@ Awaitable, Callable, ClassVar, + Sequence, Collection, Deque, Dict, @@ -49,7 +50,7 @@ from twisted.internet import defer -from synapse.api.constants import EventTypes, Membership, EventContentFields +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME @@ -65,20 +66,19 @@ from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import ( + SLIDING_SYNC_RELEVANT_STATE_SET, DeltaState, - SlidingSyncTableChanges, - SlidingSyncStateInsertValues, - SlidingSyncMembershipSnapshotSharedInsertValues, SlidingSyncMembershipInfo, + SlidingSyncMembershipSnapshotSharedInsertValues, + SlidingSyncStateInsertValues, + SlidingSyncTableChanges, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.databases.main.events import ( - SLIDING_SYNC_RELEVANT_STATE_SET, -) from synapse.types import ( PersistedEventPosition, RoomStreamToken, StateMap, + MutableStateMap, get_domain_from_id, ) from synapse.types.state import StateFilter @@ -511,8 +511,13 @@ async def _update_current_state( """ state = await self._calculate_current_state(room_id) delta = await self._calculate_state_delta(room_id, state) + sliding_sync_table_changes = await self._calculate_sliding_sync_table_changes( + room_id, [], delta + ) - await self.persist_events_store.update_current_state(room_id, delta) + await self.persist_events_store.update_current_state( + room_id, delta, sliding_sync_table_changes + ) async def _calculate_current_state(self, room_id: str) -> StateMap[str]: """Calculate the current state of a room, based on the forward extremities @@ -627,12 +632,13 @@ async def _persist_event_batch( room_id, chunk ) - with Measure(self._clock, "_calculate_sliding_sync_table_changes"): - sliding_sync_table_changes = ( - await self._calculate_sliding_sync_table_changes( - room_id, chunk, state_delta_for_room + if state_delta_for_room is not None: + with Measure(self._clock, "_calculate_sliding_sync_table_changes"): + sliding_sync_table_changes = ( + await self._calculate_sliding_sync_table_changes( + room_id, chunk, state_delta_for_room + ) ) - ) with Measure(self._clock, "calculate_chain_cover_index_for_events"): # We now calculate chain ID/sequence numbers for any state events we're @@ -772,15 +778,26 @@ async def _calculate_new_forward_extremities_and_state_delta( async def _calculate_sliding_sync_table_changes( self, room_id: str, - events_and_contexts: List[Tuple[EventBase, EventContext]], - delta_state: Optional[DeltaState], - ) -> Optional[SlidingSyncTableChanges]: + events_and_contexts: Sequence[Tuple[EventBase, EventContext]], + delta_state: DeltaState, + ) -> SlidingSyncTableChanges: """ TODO + + Args: + room_id: The room ID currently being processed. + events_and_contexts: List of tuples of (event, context) being persisted. + This is completely optional (you can pass an empty list) and will just + save us from fetching the events from the database if we already have + them. + delta_state: Deltas that are going to be used to update the + `current_state_events` table. """ to_insert = delta_state.to_insert to_delete = delta_state.to_delete + event_map = {event.event_id: event for event, _ in events_and_contexts} + # This would only happen if someone was state reset out of the room to_delete_membership_snapshots = [ state_key @@ -788,7 +805,9 @@ async def _calculate_sliding_sync_table_changes( if event_type == EventTypes.Member and self.is_mine_id(state_key) ] - membership_snapshot_updates = {} + membership_snapshot_shared_insert_values: ( + SlidingSyncMembershipSnapshotSharedInsertValues + ) = {} membership_infos: List[SlidingSyncMembershipInfo] = [] if to_insert: membership_event_id_to_user_id_map: Dict[str, str] = {} @@ -796,18 +815,39 @@ async def _calculate_sliding_sync_table_changes( if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): membership_event_id_to_user_id_map[event_id] = state_key[1] - event_id_to_sender_map = await _get_sender_for_event_ids(membership_event_id_to_user_id_map.keys()) - membership_infos = [ - SlidingSyncMembershipInfo( - user_id=user_id, - sender=event_id_to_sender_map[event_id], - membership_event_id=membership_event_id + event_id_to_sender_map: Dict[str, str] = {} + # In normal event persist scenarios, we should be able to find the + # membership events in the `events_and_contexts` given to us but it's + # possible a state reset happened which added us to the room without a + # corresponding new membership event (reset back to a previous membership). + missing_membership_event_ids: Set[str] = set() + for membership_event_id in membership_event_id_to_user_id_map.keys(): + membership_event = event_map.get(membership_event_id) + if membership_event: + event_id_to_sender_map[membership_event_id] = ( + membership_event.sender + ) + else: + missing_membership_event_ids.add(membership_event_id) + + # Otherwise, we need to find a couple previous events that we were reset to. + if missing_membership_event_ids: + remaining_event_id_to_sender_map = await _get_sender_for_event_ids( + missing_membership_event_ids ) + event_id_to_sender_map.update(remaining_event_id_to_sender_map) + + membership_infos = [ + { + "user_id": user_id, + "sender": event_id_to_sender_map[event_id], + "membership_event_id": membership_event_id, + } for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ] if membership_infos: - current_state_ids_map = ( + current_state_ids_map: MutableStateMap = dict( await self.main_store.get_partial_filtered_current_state_ids( room_id, state_filter=StateFilter.from_types( @@ -826,27 +866,25 @@ async def _calculate_sliding_sync_table_changes( if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: current_state_ids_map[state_key] = event_id - event_map = await self.main_store.get_events( + fetched_events = await self.main_store.get_events( current_state_ids_map.values() ) - current_state_map = {} - for key, event_id in current_state_ids_map.items(): - event = event_map.get(event_id) - if event: - current_state_map[key] = event + current_state_map: StateMap[EventBase] = { + key: fetched_events[event_id] + for key, event_id in current_state_ids_map.items() + } # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - membership_snapshot_shared_insert_values = SlidingSyncMembershipSnapshotSharedInsertValues() - has_known_state = False if current_state_map: state_insert_values = ( self._get_sliding_sync_insert_values_from_state_map( current_state_map ) ) + membership_snapshot_shared_insert_values.update(state_insert_values) # We have current state to work from - membership_snapshot_shared_insert_values.has_known_state = True + membership_snapshot_shared_insert_values["has_known_state"] = True else: # We don't have any `current_state_events` anymore (previously # cleared out because of `no_longer_in_room`). This can happen if @@ -856,29 +894,21 @@ async def _calculate_sliding_sync_table_changes( # rejects the invite (leaves the room), we will end up here. # # In these cases, we should inherit the meta data from the previous - # snapshot (handled by the default `ON CONFLICT ... DO UPDATE SET`). - # When using sliding sync filters, this will prevent the room from + # snapshot so we shouldn't update any of the state values. When + # using sliding sync filters, this will prevent the room from # disappearing/appearing just because you left the room. # # Ideally, we could additionally assert that we're only here for # valid non-join membership transitions. assert delta_state.no_longer_in_room - membership_snapshot_updates = { - (room_id, user_id): SlidingSyncSnapshotInsertValues( - membership_event_id=membership_event_id, - has_known_state=has_known_state, - ) - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - } - return SlidingSyncTableChanges( room_id=room_id, # For `sliding_sync_joined_rooms` joined_room_updates=TODO, to_delete_joined_rooms=TODO, # For `sliding_sync_membership_snapshots` - membership_snapshot_shared_insert_values=TODO, + membership_snapshot_shared_insert_values=membership_snapshot_shared_insert_values, to_insert_membership_snapshots=membership_infos, to_delete_membership_snapshots=to_delete_membership_snapshots, ) @@ -897,13 +927,15 @@ def _get_sliding_sync_insert_values_from_state_map( the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. """ # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + sliding_sync_insert_map: SlidingSyncStateInsertValues = {} # Parse the raw event JSON for state_key, event in state_map.items(): if state_key == (EventTypes.Create, ""): room_type = event.content.get(EventContentFields.ROOM_TYPE) - sliding_sync_insert_map["room_type"] = room_type + # Scrutinize JSON values + if room_type is None or isinstance(room_type, str): + sliding_sync_insert_map["room_type"] = room_type elif state_key == (EventTypes.RoomEncryption, ""): encryption_algorithm = event.content.get( EventContentFields.ENCRYPTION_ALGORITHM @@ -912,7 +944,9 @@ def _get_sliding_sync_insert_values_from_state_map( sliding_sync_insert_map["is_encrypted"] = is_encrypted elif state_key == (EventTypes.Name, ""): room_name = event.content.get(EventContentFields.ROOM_NAME) - sliding_sync_insert_map["room_name"] = room_name + # Scrutinize JSON values + if room_name is None or isinstance(room_name, str): + sliding_sync_insert_map["room_name"] = room_name else: # We only expect to see events according to the # `SLIDING_SYNC_RELEVANT_STATE_SET`. @@ -920,11 +954,7 @@ def _get_sliding_sync_insert_values_from_state_map( f"Unexpected event (we should not be fetching extra events): {state_key} {event.event_id}" ) - return SlidingSyncStateInsertValues( - room_type=room_type, - is_encrypted=encryption_algorithm, - room_name=room_name, - ) + return sliding_sync_insert_map async def _calculate_new_extremities( self, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1c6eecff94f..277df93459f 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -31,6 +31,7 @@ Generator, Iterable, List, + Literal, Optional, Set, Tuple, @@ -125,34 +126,51 @@ def is_noop(self) -> bool: return not self.to_delete and not self.to_insert and not self.no_longer_in_room -@attr.s(slots=True, auto_attribs=True) -class SlidingSyncStateInsertValues: - """ - Insert values relevant for the `sliding_sync_joined_rooms` and - `sliding_sync_membership_snapshots` database tables. - """ - room_type: Optional[str] - is_encrypted: Optional[bool] - room_name: Optional[str] - +# @attr.s(slots=True, auto_attribs=True) +# class SlidingSyncStateInsertValues: +# """ +# Insert values relevant for the `sliding_sync_joined_rooms` and +# `sliding_sync_membership_snapshots` database tables. +# """ +# room_type: Optional[str] +# is_encrypted: Optional[bool] +# room_name: Optional[str] + +SlidingSyncStateInsertKeys = Literal["room_type", "is_encrypted", "room_name"] +SlidingSyncStateInsertValues = Dict[ + SlidingSyncStateInsertKeys, Optional[Union[str, bool]] +] + + +# @attr.s(slots=True, auto_attribs=True) +# class SlidingSyncMembershipSnapshotSharedInsertValues(SlidingSyncStateInsertValues): +# """ +# Insert values for `sliding_sync_membership_snapshots` that we can share across +# multiple memberships +# """ +# has_known_state: bool +# # TODO: tombstone_successor_room_id: Optional[str] + +SlidingSyncMembershipSnapshotSharedInsertValues = Dict[ + # Instead of using a Union, we use a Literal to be compatible with mypy + # Literal[SlidingSyncStateInsertKeys, "has_known_state"], + Union[SlidingSyncStateInsertKeys, Literal["has_known_state"]], + Optional[Union[str, bool]], +] + +# @attr.s(slots=True, auto_attribs=True) +# class SlidingSyncMembershipInfo(SlidingSyncStateInsertValues): +# """ +# Values unique to each membership +# """ +# user_id: str +# sender: str +# membership_event_id: str + +SlidingSyncMembershipInfo = Dict[ + Literal["user_id", "sender", "membership_event_id"], Optional[Union[str, bool]] +] -@attr.s(slots=True, auto_attribs=True) -class SlidingSyncMembershipSnapshotSharedInsertValues(SlidingSyncStateInsertValues): - """ - Insert values for `sliding_sync_membership_snapshots` that we can share across - multiple memberships - """ - has_known_state: bool - # TODO: tombstone_successor_room_id: Optional[str] - -@attr.s(slots=True, auto_attribs=True) -class SlidingSyncMembershipInfo(SlidingSyncStateInsertValues): - """ - Values unique to each membership - """ - user_id: str - sender: str - membership_event_id: str @attr.s(slots=True, auto_attribs=True) class SlidingSyncTableChanges: @@ -164,7 +182,9 @@ class SlidingSyncTableChanges: # Shared values to upsert into `sliding_sync_membership_snapshots` for each # `to_insert_membership_snapshots` - membership_snapshot_shared_insert_values: SlidingSyncMembershipSnapshotSharedInsertValues + membership_snapshot_shared_insert_values: ( + SlidingSyncMembershipSnapshotSharedInsertValues + ) # List of membership to insert into `sliding_sync_membership_snapshots` to_insert_membership_snapshots: List[SlidingSyncMembershipInfo] # List of user_id to delete from `sliding_sync_membership_snapshots` @@ -667,6 +687,9 @@ def _persist_events_txn( # room_memberships, where applicable. # NB: This function invalidates all state related caches if state_delta_for_room: + # If the state delta exists, the sliding sync table changes should also exist + assert sliding_sync_table_changes is not None + self._update_current_state_txn( txn, room_id, @@ -1213,6 +1236,7 @@ async def update_current_state( self, room_id: str, state_delta: DeltaState, + sliding_sync_table_changes: SlidingSyncTableChanges, ) -> None: """Update the current state stored in the datatabase for the given room""" @@ -1226,6 +1250,7 @@ async def update_current_state( room_id, delta_state=state_delta, stream_id=stream_ordering, + sliding_sync_table_changes=sliding_sync_table_changes, ) def _update_current_state_txn( @@ -1234,7 +1259,7 @@ def _update_current_state_txn( room_id: str, delta_state: DeltaState, stream_id: int, - sliding_sync_table_changes: Optional[SlidingSyncTableChanges], + sliding_sync_table_changes: SlidingSyncTableChanges, ) -> None: to_delete = delta_state.to_delete to_insert = delta_state.to_insert @@ -1250,73 +1275,6 @@ def _update_current_state_txn( if ev_type == EventTypes.Member } - # Handle updating the `sliding_sync_membership_snapshots` table - # - # This would only happen if someone was state reset out of the room - if sliding_sync_table_changes.to_delete_membership_snapshots: - txn.execute_batch( - "DELETE FROM sliding_sync_membership_snapshots" - " WHERE room_id = ? AND user_id = ?", - sliding_sync_table_changes.to_delete_membership_snapshots, - ) - - # We handle `sliding_sync_membership_snapshots` before `current_state_events` so - # we can gather the current state before it might be deleted if we are - # last ones in the room and now we are `no_longer_in_room`. - # - # We do this regardless of whether the server is `no_longer_in_room` or not - # because we still want a row if a local user was just left/kicked or got banned - # from the room. - if sliding_sync_table_changes.membership_snapshot_updates: - - # TODO - for asdf in sliding_sync_table_changes.membership_snapshot_updates: - for attr_name in ["room_type", "is_encrypted", "room_name"] - [ - getattr(x, attr_name) - - ] - - # Update the `sliding_sync_membership_snapshots` table - # - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = sliding_sync_membership_snapshots_insert_map.keys() - insert_values = sliding_sync_membership_snapshots_insert_map.values() - # We need to insert/update regardless of whether we have `insert_keys` - # because there are other fields in the `ON CONFLICT` upsert to run (see - # inherit case above for more context when this happens). - txn.execute_batch( - f""" - INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering - {("," + ", ".join(insert_keys)) if insert_keys else ""}) - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) - {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} - ) - ON CONFLICT (room_id, user_id) - DO UPDATE SET - membership_event_id = EXCLUDED.membership_event_id, - membership = EXCLUDED.membership, - event_stream_ordering = EXCLUDED.event_stream_ordering - {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} - """, - [ - [ - room_id, - user_id, - membership_event_id, - membership_event_id, - membership_event_id, - ] - + list(insert_values) - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - ], - ) - if delta_state.no_longer_in_room: # Server is no longer in the room so we delete the room from # current_state_events, being careful we've already updated the @@ -1545,6 +1503,64 @@ def _update_current_state_txn( ], ) + # Handle updating the `sliding_sync_membership_snapshots` table + # + # This would only happen if someone was state reset out of the room + if sliding_sync_table_changes.to_delete_membership_snapshots: + txn.execute_batch( + "DELETE FROM sliding_sync_membership_snapshots" + " WHERE room_id = ? AND user_id = ?", + sliding_sync_table_changes.to_delete_membership_snapshots, + ) + + # We do this regardless of whether the server is `no_longer_in_room` or not + # because we still want a row if a local user was just left/kicked or got banned + # from the room. + if sliding_sync_table_changes.to_insert_membership_snapshots: + # Update the `sliding_sync_membership_snapshots` table + # + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = ( + sliding_sync_table_changes.membership_snapshot_shared_insert_values.keys() + ) + insert_values = ( + sliding_sync_table_changes.membership_snapshot_shared_insert_values.values() + ) + # We need to insert/update regardless of whether we have `insert_keys` + # because there are other fields in the `ON CONFLICT` upsert to run (see + # inherit case above for more context when this happens). + txn.execute_batch( + f""" + INSERT INTO sliding_sync_membership_snapshots + (room_id, user_id, membership_event_id, membership, event_stream_ordering + {("," + ", ".join(insert_keys)) if insert_keys else ""}) + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?) + {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} + ) + ON CONFLICT (room_id, user_id) + DO UPDATE SET + membership_event_id = EXCLUDED.membership_event_id, + membership = EXCLUDED.membership, + event_stream_ordering = EXCLUDED.event_stream_ordering + {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} + """, + [ + [ + room_id, + membership_info["user_id"], + membership_info["membership_event_id"], + membership_info["membership_event_id"], + membership_info["membership_event_id"], + ] + + list(insert_values) + for membership_info in sliding_sync_table_changes.to_insert_membership_snapshots + ], + ) + txn.call_after( self.store._curr_state_delta_stream_cache.entity_has_changed, room_id, From ac5b05c86ba3b9eda24c57747f0ae67f53b5a444 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 13:29:56 -0500 Subject: [PATCH 063/142] Use `TypedDict` --- synapse/storage/databases/main/events.py | 76 ++++++++++-------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 277df93459f..7fdf21566d4 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -38,6 +38,7 @@ Union, cast, ) +from typing_extensions import TypedDict import attr from prometheus_client import Counter @@ -126,50 +127,37 @@ def is_noop(self) -> bool: return not self.to_delete and not self.to_insert and not self.no_longer_in_room -# @attr.s(slots=True, auto_attribs=True) -# class SlidingSyncStateInsertValues: -# """ -# Insert values relevant for the `sliding_sync_joined_rooms` and -# `sliding_sync_membership_snapshots` database tables. -# """ -# room_type: Optional[str] -# is_encrypted: Optional[bool] -# room_name: Optional[str] - -SlidingSyncStateInsertKeys = Literal["room_type", "is_encrypted", "room_name"] -SlidingSyncStateInsertValues = Dict[ - SlidingSyncStateInsertKeys, Optional[Union[str, bool]] -] - - -# @attr.s(slots=True, auto_attribs=True) -# class SlidingSyncMembershipSnapshotSharedInsertValues(SlidingSyncStateInsertValues): -# """ -# Insert values for `sliding_sync_membership_snapshots` that we can share across -# multiple memberships -# """ -# has_known_state: bool -# # TODO: tombstone_successor_room_id: Optional[str] - -SlidingSyncMembershipSnapshotSharedInsertValues = Dict[ - # Instead of using a Union, we use a Literal to be compatible with mypy - # Literal[SlidingSyncStateInsertKeys, "has_known_state"], - Union[SlidingSyncStateInsertKeys, Literal["has_known_state"]], - Optional[Union[str, bool]], -] - -# @attr.s(slots=True, auto_attribs=True) -# class SlidingSyncMembershipInfo(SlidingSyncStateInsertValues): -# """ -# Values unique to each membership -# """ -# user_id: str -# sender: str -# membership_event_id: str - -SlidingSyncMembershipInfo = Dict[ - Literal["user_id", "sender", "membership_event_id"], Optional[Union[str, bool]] -] +class SlidingSyncStateInsertValues(TypedDict, total=False): + """ + Insert values relevant for the `sliding_sync_joined_rooms` and + `sliding_sync_membership_snapshots` database tables. + """ + + room_type: Optional[str] + is_encrypted: Optional[bool] + room_name: Optional[str] + + +class SlidingSyncMembershipSnapshotSharedInsertValues( + SlidingSyncStateInsertValues, total=False +): + """ + Insert values for `sliding_sync_membership_snapshots` that we can share across + multiple memberships + """ + + has_known_state: Optional[bool] + # TODO: tombstone_successor_room_id: Optional[str] + + +class SlidingSyncMembershipInfo(TypedDict, total=False): + """ + Values unique to each membership + """ + + user_id: str + sender: str + membership_event_id: str @attr.s(slots=True, auto_attribs=True) From 45c89ec625817ac635c35d9b0af3cdfc33b7b80b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 15:41:46 -0500 Subject: [PATCH 064/142] Move pre-processing completely outside transaction --- synapse/storage/controllers/persist_events.py | 99 +++++++++++++++---- synapse/storage/databases/main/events.py | 48 +++------ .../storage/databases/main/events_worker.py | 30 +++++- tests/storage/test_events.py | 36 +++++-- 4 files changed, 151 insertions(+), 62 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 68f3bb0b94c..f21f5edb194 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -30,7 +30,6 @@ Awaitable, Callable, ClassVar, - Sequence, Collection, Deque, Dict, @@ -39,6 +38,7 @@ Iterable, List, Optional, + Sequence, Set, Tuple, TypeVar, @@ -75,10 +75,10 @@ ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( + MutableStateMap, PersistedEventPosition, RoomStreamToken, StateMap, - MutableStateMap, get_domain_from_id, ) from synapse.types.state import StateFilter @@ -782,7 +782,13 @@ async def _calculate_sliding_sync_table_changes( delta_state: DeltaState, ) -> SlidingSyncTableChanges: """ - TODO + Calculate the changes to the `sliding_sync_membership_snapshots` and + `sliding_sync_joined_rooms` tables given the deltas that are going to be used to + update the `current_state_events` table. + + Just a bunch of pre-processing so we so we don't need to spend time in the + transaction itself gathering all of this info. It's also easier to deal with + redactions outside of a transaction. Args: room_id: The room ID currently being processed. @@ -798,8 +804,10 @@ async def _calculate_sliding_sync_table_changes( event_map = {event.event_id: event for event, _ in events_and_contexts} + # Handle gathering info for the `sliding_sync_membership_snapshots` table + # # This would only happen if someone was state reset out of the room - to_delete_membership_snapshots = [ + user_ids_to_delete_membership_snapshots = [ state_key for event_type, state_key in to_delete if event_type == EventTypes.Member and self.is_mine_id(state_key) @@ -808,7 +816,9 @@ async def _calculate_sliding_sync_table_changes( membership_snapshot_shared_insert_values: ( SlidingSyncMembershipSnapshotSharedInsertValues ) = {} - membership_infos: List[SlidingSyncMembershipInfo] = [] + membership_infos_to_insert_membership_snapshots: List[ + SlidingSyncMembershipInfo + ] = [] if to_insert: membership_event_id_to_user_id_map: Dict[str, str] = {} for state_key, event_id in to_insert.items(): @@ -830,14 +840,16 @@ async def _calculate_sliding_sync_table_changes( else: missing_membership_event_ids.add(membership_event_id) - # Otherwise, we need to find a couple previous events that we were reset to. + # Otherwise, we need to find a couple events that we were reset to. if missing_membership_event_ids: - remaining_event_id_to_sender_map = await _get_sender_for_event_ids( - missing_membership_event_ids + remaining_event_id_to_sender_map = ( + await self.main_store.get_sender_for_event_ids( + missing_membership_event_ids + ) ) event_id_to_sender_map.update(remaining_event_id_to_sender_map) - membership_infos = [ + membership_infos_to_insert_membership_snapshots = [ { "user_id": user_id, "sender": event_id_to_sender_map[event_id], @@ -846,7 +858,7 @@ async def _calculate_sliding_sync_table_changes( for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ] - if membership_infos: + if membership_infos_to_insert_membership_snapshots: current_state_ids_map: MutableStateMap = dict( await self.main_store.get_partial_filtered_current_state_ids( room_id, @@ -871,11 +883,10 @@ async def _calculate_sliding_sync_table_changes( ) current_state_map: StateMap[EventBase] = { - key: fetched_events[event_id] - for key, event_id in current_state_ids_map.items() + state_key: fetched_events[event_id] + for state_key, event_id in current_state_ids_map.items() } - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table if current_state_map: state_insert_values = ( self._get_sliding_sync_insert_values_from_state_map( @@ -902,15 +913,69 @@ async def _calculate_sliding_sync_table_changes( # valid non-join membership transitions. assert delta_state.no_longer_in_room + # Handle gathering info for the `sliding_sync_joined_rooms` table + # + # We only deal with + # updating the state related columns. The + # `event_stream_ordering`/`bump_stamp` are updated elsewhere in the event + # persisting stack (see + # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) + # + joined_room_updates: SlidingSyncStateInsertValues = {} + if not delta_state.no_longer_in_room: + # Look through the items we're going to insert into the current state to see + # if there is anything that we care about and should also update in the + # `sliding_sync_joined_rooms` table. + current_state_ids_map = {} + for state_key, event_id in to_insert.items(): + if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: + current_state_ids_map[state_key] = event_id + + # Get the full event objects for the current state events + # + # In normal event persist scenarios, we should be able to find the state + # events in the `events_and_contexts` given to us but it's possible a state + # reset happened which that reset back to a previous state. + current_state_map = {} + missing_event_ids: Set[str] = set() + for state_key, event_id in current_state_ids_map.items(): + event = event_map.get(event_id) + if event: + current_state_map[state_key] = event + else: + missing_event_ids.add(membership_event_id) + + # Otherwise, we need to find a couple events that we were reset to. + if missing_event_ids: + remaining_events = await self.main_store.get_events( + current_state_ids_map.values() + ) + # There shouldn't be any missing events + assert remaining_events.keys() == missing_event_ids + for event in remaining_events.values(): + current_state_map[(event.type, event.state_key)] = event + + joined_room_updates = self._get_sliding_sync_insert_values_from_state_map( + current_state_map + ) + + # If something is being deleted from the state, we need to clear it out + for state_key in to_delete: + if state_key == (EventTypes.Create, ""): + joined_room_updates["room_type"] = None + elif state_key == (EventTypes.RoomEncryption, ""): + joined_room_updates["is_encrypted"] = False + elif state_key == (EventTypes.Name, ""): + joined_room_updates["room_name"] = None + return SlidingSyncTableChanges( room_id=room_id, # For `sliding_sync_joined_rooms` - joined_room_updates=TODO, - to_delete_joined_rooms=TODO, + joined_room_updates=joined_room_updates, # For `sliding_sync_membership_snapshots` membership_snapshot_shared_insert_values=membership_snapshot_shared_insert_values, - to_insert_membership_snapshots=membership_infos, - to_delete_membership_snapshots=to_delete_membership_snapshots, + to_insert_membership_snapshots=membership_infos_to_insert_membership_snapshots, + to_delete_membership_snapshots=user_ids_to_delete_membership_snapshots, ) @classmethod diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 7fdf21566d4..96a0033eaf7 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -31,17 +31,16 @@ Generator, Iterable, List, - Literal, Optional, Set, Tuple, Union, cast, ) -from typing_extensions import TypedDict import attr from prometheus_client import Counter +from typing_extensions import TypedDict import synapse.metrics from synapse.api.constants import ( @@ -127,6 +126,7 @@ def is_noop(self) -> bool: return not self.to_delete and not self.to_insert and not self.no_longer_in_room +# We want `total=False` because we want to allow values to be unset. class SlidingSyncStateInsertValues(TypedDict, total=False): """ Insert values relevant for the `sliding_sync_joined_rooms` and @@ -163,10 +163,8 @@ class SlidingSyncMembershipInfo(TypedDict, total=False): @attr.s(slots=True, auto_attribs=True) class SlidingSyncTableChanges: room_id: str - # room_id -> dict to upsert into `sliding_sync_joined_rooms` - joined_room_updates: Dict[str, SlidingSyncStateInsertValues] - # room_ids to delete from `sliding_sync_joined_rooms` - to_delete_joined_rooms: StrCollection + # Values to upsert into `sliding_sync_joined_rooms` + joined_room_updates: SlidingSyncStateInsertValues # Shared values to upsert into `sliding_sync_membership_snapshots` for each # `to_insert_membership_snapshots` @@ -1371,32 +1369,9 @@ def _update_current_state_txn( # persisting stack (see # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) # - current_state_map = {} - for state_key, event_id in to_insert.items(): - if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: - current_state_map[state_key] = event_id - - # Map of values to insert/update in the `sliding_sync_joined_rooms` table - sliding_sync_joined_rooms_insert_map = ( - self._get_sliding_sync_insert_values_from_state_ids_map_txn( - txn, current_state_map - ) - ) - - # If something is being deleted from the state, we need to clear it out - for state_key in to_delete: - if state_key == (EventTypes.Create, ""): - sliding_sync_joined_rooms_insert_map["room_type"] = None - elif state_key == (EventTypes.RoomEncryption, ""): - sliding_sync_joined_rooms_insert_map["is_encrypted"] = False - elif state_key == (EventTypes.Name, ""): - sliding_sync_joined_rooms_insert_map["room_name"] = None - - # Update the `sliding_sync_joined_rooms` table - # # Pulling keys/values separately is safe and will produce congruent lists - insert_keys = sliding_sync_joined_rooms_insert_map.keys() - insert_values = sliding_sync_joined_rooms_insert_map.values() + insert_keys = sliding_sync_table_changes.joined_room_updates.keys() + insert_values = sliding_sync_table_changes.joined_room_updates.values() # We only need to update when one of the relevant state values has changed if insert_keys: # If we have some `to_insert` values, we can use the standard upsert @@ -1422,8 +1397,10 @@ def _update_current_state_txn( # We don't update `event_stream_ordering` `ON CONFLICT` because it's # simpler and we can just rely on - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to do - # the right thing (same for `bump_stamp`). + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to + # do the right thing (same for `bump_stamp`). The only reason we're + # inserting `event_stream_ordering` here is because the column has a + # `NON NULL` constraint and we need some answer. txn.execute( f""" INSERT INTO sliding_sync_joined_rooms @@ -1498,7 +1475,10 @@ def _update_current_state_txn( txn.execute_batch( "DELETE FROM sliding_sync_membership_snapshots" " WHERE room_id = ? AND user_id = ?", - sliding_sync_table_changes.to_delete_membership_snapshots, + [ + (room_id, user_id) + for user_id in sliding_sync_table_changes.to_delete_membership_snapshots + ], ) # We do this regardless of whether the server is `no_longer_in_room` or not diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 4d4877c4c3f..e77ece682fa 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -81,7 +81,7 @@ MultiWriterIdGenerator, ) from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import JsonDict, get_domain_from_id +from synapse.types import JsonDict, StrCollection, get_domain_from_id from synapse.types.state import StateFilter from synapse.util import unwrapFirstError from synapse.util.async_helpers import ObservableDeferred, delay_cancellation @@ -1979,6 +1979,34 @@ async def get_event_ordering(self, event_id: str, room_id: str) -> Tuple[int, in return int(res[0]), int(res[1]) + async def get_sender_for_event_ids( + self, event_ids: StrCollection + ) -> Mapping[str, str]: + """ + Get the sender for a list of event IDs. + + Args: + event_ids: The event IDs to look up. + + Returns: + A mapping from event ID to event sender. + """ + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=( + "event_id", + "sender", + ), + desc="get_sender_for_event_ids", + ), + ) + + return dict(rows) + async def get_next_event_to_expire(self) -> Optional[Tuple[str, int]]: """Retrieve the entry with the lowest expiry timestamp in the event_expiry table, or None if there's no more event to expire. diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index f6f81ea9549..3d3f95f29ca 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -1432,19 +1432,27 @@ def test_joined_room_meta_state_reset(self) -> None: ) ) event_chunk = [message_tuple] + delta_state = DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Name, "")], + to_insert={}, + ) + assert self.storage_controllers.persistence is not None + sliding_sync_table_changes = self.get_success( + self.storage_controllers.persistence._calculate_sliding_sync_table_changes( + room_id, event_chunk, delta_state + ) + ) self.get_success( self.persist_events_store._persist_events_and_state_updates( room_id, event_chunk, - state_delta_for_room=DeltaState( - # This is the state reset part. We're removing the room name state. - to_delete=[(EventTypes.Name, "")], - to_insert={}, - ), + state_delta_for_room=delta_state, new_forward_extremities={message_tuple[0].event_id}, use_negative_stream_ordering=False, inhibit_local_membership_updates=False, new_event_links={}, + sliding_sync_table_changes=sliding_sync_table_changes, ) ) @@ -2672,19 +2680,27 @@ def test_non_join_state_reset(self) -> None: ) ) event_chunk = [message_tuple] + delta_state = DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Member, user1_id)], + to_insert={}, + ) + assert self.storage_controllers.persistence is not None + sliding_sync_table_changes = self.get_success( + self.storage_controllers.persistence._calculate_sliding_sync_table_changes( + room_id, event_chunk, delta_state + ) + ) self.get_success( self.persist_events_store._persist_events_and_state_updates( room_id, event_chunk, - state_delta_for_room=DeltaState( - # This is the state reset part. We're removing the room name state. - to_delete=[(EventTypes.Member, user1_id)], - to_insert={}, - ), + state_delta_for_room=delta_state, new_forward_extremities={message_tuple[0].event_id}, use_negative_stream_ordering=False, inhibit_local_membership_updates=False, new_event_links={}, + sliding_sync_table_changes=sliding_sync_table_changes, ) ) From 3eb77c3a2a4259787bf37da9b3c45b3ac7cd0e62 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 17:24:43 -0500 Subject: [PATCH 065/142] Add sanity checks and fix wrong variable usage --- synapse/storage/controllers/persist_events.py | 15 ++++++++++++--- synapse/storage/databases/main/events.py | 3 +++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index f21f5edb194..341e2192838 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -847,12 +847,19 @@ async def _calculate_sliding_sync_table_changes( missing_membership_event_ids ) ) + # There shouldn't be any missing events + assert ( + remaining_event_id_to_sender_map.keys() + == missing_membership_event_ids + ), missing_membership_event_ids.difference( + remaining_event_id_to_sender_map.keys() + ) event_id_to_sender_map.update(remaining_event_id_to_sender_map) membership_infos_to_insert_membership_snapshots = [ { "user_id": user_id, - "sender": event_id_to_sender_map[event_id], + "sender": event_id_to_sender_map[membership_event_id], "membership_event_id": membership_event_id, } for membership_event_id, user_id in membership_event_id_to_user_id_map.items() @@ -943,7 +950,7 @@ async def _calculate_sliding_sync_table_changes( if event: current_state_map[state_key] = event else: - missing_event_ids.add(membership_event_id) + missing_event_ids.add(event_id) # Otherwise, we need to find a couple events that we were reset to. if missing_event_ids: @@ -951,7 +958,9 @@ async def _calculate_sliding_sync_table_changes( current_state_ids_map.values() ) # There shouldn't be any missing events - assert remaining_events.keys() == missing_event_ids + assert ( + remaining_events.keys() == missing_event_ids + ), missing_event_ids.difference(remaining_events.keys()) for event in remaining_events.values(): current_state_map[(event.type, event.state_key)] = event diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 96a0033eaf7..28ba64261e8 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1250,6 +1250,9 @@ def _update_current_state_txn( to_delete = delta_state.to_delete to_insert = delta_state.to_insert + # Sanity check we're processing the same thing + assert room_id == sliding_sync_table_changes.room_id + # Figure out the changes of membership to invalidate the # `get_rooms_for_user` cache. # We find out which membership events we may have deleted From 726a8e96985acb44aae28cfea70a1ed45ff23813 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 21:48:44 -0500 Subject: [PATCH 066/142] Attempt getting real events in backgroun update (needs work) --- synapse/storage/controllers/persist_events.py | 36 +- synapse/storage/databases/main/events.py | 35 +- .../storage/databases/main/events_worker.py | 32 +- synapse/storage/databases/state/bg_updates.py | 314 +++++++++++++++++- 4 files changed, 348 insertions(+), 69 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 341e2192838..c231c9eeaa4 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -825,7 +825,7 @@ async def _calculate_sliding_sync_table_changes( if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): membership_event_id_to_user_id_map[event_id] = state_key[1] - event_id_to_sender_map: Dict[str, str] = {} + membership_event_map: Dict[str, EventBase] = {} # In normal event persist scenarios, we should be able to find the # membership events in the `events_and_contexts` given to us but it's # possible a state reset happened which added us to the room without a @@ -834,39 +834,34 @@ async def _calculate_sliding_sync_table_changes( for membership_event_id in membership_event_id_to_user_id_map.keys(): membership_event = event_map.get(membership_event_id) if membership_event: - event_id_to_sender_map[membership_event_id] = ( - membership_event.sender - ) + membership_event_map[membership_event_id] = membership_event else: missing_membership_event_ids.add(membership_event_id) # Otherwise, we need to find a couple events that we were reset to. if missing_membership_event_ids: - remaining_event_id_to_sender_map = ( - await self.main_store.get_sender_for_event_ids( - missing_membership_event_ids - ) + remaining_events = await self.main_store.get_events( + missing_membership_event_ids ) # There shouldn't be any missing events assert ( - remaining_event_id_to_sender_map.keys() - == missing_membership_event_ids - ), missing_membership_event_ids.difference( - remaining_event_id_to_sender_map.keys() - ) - event_id_to_sender_map.update(remaining_event_id_to_sender_map) + remaining_events.keys() == missing_membership_event_ids + ), missing_membership_event_ids.difference(remaining_events.keys()) + membership_event_map.update(remaining_events) membership_infos_to_insert_membership_snapshots = [ - { - "user_id": user_id, - "sender": event_id_to_sender_map[membership_event_id], - "membership_event_id": membership_event_id, - } + SlidingSyncMembershipInfo( + user_id=user_id, + sender=membership_event_map[membership_event_id].sender, + membership_event_id=membership_event_id, + membership=membership_event_map[membership_event_id].membership, + membership_event_stream_ordering=None, + ) for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ] if membership_infos_to_insert_membership_snapshots: - current_state_ids_map: MutableStateMap = dict( + current_state_ids_map: MutableStateMap[str] = dict( await self.main_store.get_partial_filtered_current_state_ids( room_id, state_filter=StateFilter.from_types( @@ -987,6 +982,7 @@ async def _calculate_sliding_sync_table_changes( to_delete_membership_snapshots=user_ids_to_delete_membership_snapshots, ) + # TODO: Should we put this next to the other `_get_sliding_sync_*` functions? @classmethod def _get_sliding_sync_insert_values_from_state_map( cls, state_map: StateMap[EventBase] diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 28ba64261e8..f813d48519a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -34,7 +34,6 @@ Optional, Set, Tuple, - Union, cast, ) @@ -150,7 +149,8 @@ class SlidingSyncMembershipSnapshotSharedInsertValues( # TODO: tombstone_successor_room_id: Optional[str] -class SlidingSyncMembershipInfo(TypedDict, total=False): +@attr.s(slots=True, auto_attribs=True) +class SlidingSyncMembershipInfo: """ Values unique to each membership """ @@ -158,6 +158,9 @@ class SlidingSyncMembershipInfo(TypedDict, total=False): user_id: str sender: str membership_event_id: str + membership: str + # Sometimes we're working with events that aren't persisted yet + membership_event_stream_ordering: Optional[int] @attr.s(slots=True, auto_attribs=True) @@ -1507,8 +1510,7 @@ def _update_current_state_txn( (room_id, user_id, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), + ?, ?, ?, ?, (SELECT stream_ordering FROM events WHERE event_id = ?) {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) @@ -1522,10 +1524,10 @@ def _update_current_state_txn( [ [ room_id, - membership_info["user_id"], - membership_info["membership_event_id"], - membership_info["membership_event_id"], - membership_info["membership_event_id"], + membership_info.user_id, + membership_info.membership_event_id, + membership_info.membership, + membership_info.membership_event_id, ] + list(insert_values) for membership_info in sliding_sync_table_changes.to_insert_membership_snapshots @@ -1549,6 +1551,8 @@ def _update_current_state_txn( txn, {m for m in members_to_cache_bust if not self.hs.is_mine_id(m)} ) + # TODO: We can probably remove this function in favor of other stuff. + # TODO: This doesn't take into account redactions @classmethod def _get_relevant_sliding_sync_current_state_event_ids_txn( cls, txn: LoggingTransaction, room_id: str @@ -1587,10 +1591,12 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( return current_state_map + # TODO: We can probably remove this function in favor of other stuff. + # TODO: Should we put this next to the other `_get_sliding_sync_*` function? @classmethod def _get_sliding_sync_insert_values_from_state_ids_map_txn( cls, txn: LoggingTransaction, state_map: StateMap[str] - ) -> Dict[str, Optional[Union[str, bool]]]: + ) -> SlidingSyncStateInsertValues: """ Fetch events in the `state_map` and extract the relevant state values needed to insert into the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` @@ -1602,7 +1608,7 @@ def _get_sliding_sync_insert_values_from_state_ids_map_txn( the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. """ # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + sliding_sync_insert_map: SlidingSyncStateInsertValues = {} # Fetch the raw event JSON from the database ( event_id_in_list_clause, @@ -1644,19 +1650,18 @@ def _get_sliding_sync_insert_values_from_state_ids_map_txn( sliding_sync_insert_map["room_name"] = room_name else: # We only expect to see events according to the - # `SLIDING_SYNC_RELEVANT_STATE_SET` which is what will - # `_get_relevant_sliding_sync_current_state_event_ids_txn()` will - # return. + # `SLIDING_SYNC_RELEVANT_STATE_SET`. raise AssertionError( f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" ) return sliding_sync_insert_map + # TODO: Should we put this next to the other `_get_sliding_sync_*` function? @classmethod def _get_sliding_sync_insert_values_from_stripped_state_txn( cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any - ) -> Dict[str, Optional[Union[str, bool]]]: + ) -> SlidingSyncMembershipSnapshotSharedInsertValues: """ Pull out the relevant state values from the stripped state needed to insert into the `sliding_sync_membership_snapshots` tables. @@ -1666,7 +1671,7 @@ def _get_sliding_sync_insert_values_from_stripped_state_txn( state values needed to insert into the `sliding_sync_membership_snapshots` tables. """ # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_map: Dict[str, Optional[Union[str, bool]]] = {} + sliding_sync_insert_map: SlidingSyncMembershipSnapshotSharedInsertValues = {} if unsigned_stripped_state_events is not None: stripped_state_map: MutableStateMap[StrippedStateEvent] = {} diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e77ece682fa..cf24d845547 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -81,7 +81,7 @@ MultiWriterIdGenerator, ) from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import JsonDict, StrCollection, get_domain_from_id +from synapse.types import JsonDict, get_domain_from_id from synapse.types.state import StateFilter from synapse.util import unwrapFirstError from synapse.util.async_helpers import ObservableDeferred, delay_cancellation @@ -511,6 +511,8 @@ async def get_events( ) -> Dict[str, EventBase]: """Get events from the database + Unknown events will be omitted from the response. + Args: event_ids: The event_ids of the events to fetch @@ -1979,34 +1981,6 @@ async def get_event_ordering(self, event_id: str, room_id: str) -> Tuple[int, in return int(res[0]), int(res[1]) - async def get_sender_for_event_ids( - self, event_ids: StrCollection - ) -> Mapping[str, str]: - """ - Get the sender for a list of event IDs. - - Args: - event_ids: The event IDs to look up. - - Returns: - A mapping from event ID to event sender. - """ - rows = cast( - List[Tuple[str, str]], - await self.db_pool.simple_select_many_batch( - table="events", - column="event_id", - iterable=event_ids, - retcols=( - "event_id", - "sender", - ), - desc="get_sender_for_event_ids", - ), - ) - - return dict(rows) - async def get_next_event_to_expire(self) -> Optional[Tuple[str, int]]: """Retrieve the entry with the lowest expiry timestamp in the event_expiry table, or None if there's no more event to expire. diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 526bf7ea62d..3b55c528cee 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -35,6 +35,7 @@ from synapse.storage.databases.main.events import ( SLIDING_SYNC_RELEVANT_STATE_SET, PersistEventsStore, + SlidingSyncMembershipSnapshotSharedInsertValues, ) from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine from synapse.types import JsonDict, MutableStateMap, StateMap, StrCollection @@ -614,6 +615,7 @@ def _txn(txn: LoggingTransaction) -> int: return 0 for (room_id,) in rooms_to_update_rows: + # TODO: Handle redactions current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( txn, room_id ) @@ -741,9 +743,9 @@ def _txn(txn: LoggingTransaction) -> int: ) # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_insert_map: Dict[ - str, Optional[Union[str, bool]] - ] = {} + sliding_sync_membership_snapshots_insert_map: ( + SlidingSyncMembershipSnapshotSharedInsertValues + ) = {} if membership == Membership.JOIN: # If we're still joined, we can pull from current state current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( @@ -754,9 +756,12 @@ def _txn(txn: LoggingTransaction) -> int: # for each room assert current_state_map - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( + state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( txn, current_state_map ) + sliding_sync_membership_snapshots_insert_map.update( + state_insert_values + ) # We should have some insert values for each room, even if they are `None` assert sliding_sync_membership_snapshots_insert_map @@ -854,9 +859,12 @@ def _txn(txn: LoggingTransaction) -> int: ) state_map = state_by_group[state_group] - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( + state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( txn, state_map ) + sliding_sync_membership_snapshots_insert_map.update( + state_insert_values + ) # We should have some insert values for each room, even if they are `None` assert sliding_sync_membership_snapshots_insert_map @@ -922,3 +930,299 @@ def _txn(txn: LoggingTransaction) -> int: ) return count + + # async def _sliding_sync_membership_snapshots_backfill( + # self, progress: JsonDict, batch_size: int + # ) -> int: + # """ + # Handles backfilling the `sliding_sync_membership_snapshots` table. + # """ + # last_event_stream_ordering = progress.get( + # "last_event_stream_ordering", -(1 << 31) + # ) + + # def _find_memberships_to_update_txn( + # txn: LoggingTransaction, + # ) -> List[Tuple[str, str, str, str, str, int, bool]]: + # # Fetch the set of event IDs that we want to update + # txn.execute( + # """ + # SELECT + # c.room_id, + # c.user_id, + # e.sender + # c.event_id, + # c.membership, + # c.event_stream_ordering, + # e.outlier + # FROM local_current_membership as c + # INNER JOIN events AS e USING (event_id) + # WHERE event_stream_ordering > ? + # ORDER BY event_stream_ordering ASC + # LIMIT ? + # """, + # (last_event_stream_ordering, batch_size), + # ) + + # memberships_to_update_rows = cast( + # List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() + # ) + + # return memberships_to_update_rows + + # memberships_to_update_rows = await self.db_pool.runInteraction( + # "sliding_sync_membership_snapshots_backfill._find_memberships_to_update_txn", + # _find_memberships_to_update_txn, + # ) + + # if not memberships_to_update_rows: + # await self.db_pool.updates._end_background_update( + # _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL + # ) + + # store = self.hs.get_storage_controllers().main + + # def _find_previous_membership_txn( + # txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int + # ) -> Tuple[str, str]: + # # Find the previous invite/knock event before the leave event + # txn.execute( + # """ + # SELECT event_id, membership + # FROM room_memberships + # WHERE + # room_id = ? + # AND user_id = ? + # AND event_stream_ordering < ? + # ORDER BY event_stream_ordering DESC + # LIMIT 1 + # """, + # ( + # room_id, + # user_id, + # stream_ordering, + # ), + # ) + # row = txn.fetchone() + + # # We should see a corresponding previous invite/knock event + # assert row is not None + # event_id, membership = row + + # return event_id, membership + + # # Map from (room_id, user_id) to ... + # to_insert_membership_snapshots: Dict[ + # Tuple[str, str], SlidingSyncMembershipSnapshotSharedInsertValues + # ] = {} + # to_insert_membership_infos: Dict[Tuple[str, str], SlidingSyncMembershipInfo] = ( + # {} + # ) + # for ( + # room_id, + # user_id, + # sender, + # membership_event_id, + # membership, + # membership_event_stream_ordering, + # is_outlier, + # ) in memberships_to_update_rows: + # # We don't know how to handle `membership` values other than these. The + # # code below would need to be updated. + # assert membership in ( + # Membership.JOIN, + # Membership.INVITE, + # Membership.KNOCK, + # Membership.LEAVE, + # Membership.BAN, + # ) + + # # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + # sliding_sync_membership_snapshots_insert_map: ( + # SlidingSyncMembershipSnapshotSharedInsertValues + # ) = {} + # if membership == Membership.JOIN: + # # If we're still joined, we can pull from current state. + # current_state_ids_map: StateMap[str] = ( + # await store.get_partial_filtered_current_state_ids( + # room_id, + # state_filter=StateFilter.from_types( + # SLIDING_SYNC_RELEVANT_STATE_SET + # ), + # ) + # ) + # # We're iterating over rooms that we are joined to so they should + # # have `current_state_events` and we should have some current state + # # for each room + # assert current_state_ids_map + + # fetched_events = await store.get_events(current_state_ids_map.values()) + + # current_state_map: StateMap[EventBase] = { + # state_key: fetched_events[event_id] + # for state_key, event_id in current_state_ids_map.items() + # } + + # state_insert_values = EventsPersistenceStorageController._get_sliding_sync_insert_values_from_state_map( + # current_state_map + # ) + # sliding_sync_membership_snapshots_insert_map.update(state_insert_values) + # # We should have some insert values for each room, even if they are `None` + # assert sliding_sync_membership_snapshots_insert_map + + # # We have current state to work from + # sliding_sync_membership_snapshots_insert_map["has_known_state"] = True + # elif membership in (Membership.INVITE, Membership.KNOCK) or ( + # membership == Membership.LEAVE and is_outlier + # ): + # invite_or_knock_event_id = membership_event_id + # invite_or_knock_membership = membership + + # # If the event is an `out_of_band_membership` (special case of + # # `outlier`), we never had historical state so we have to pull from + # # the stripped state on the previous invite/knock event. This gives + # # us a consistent view of the room state regardless of your + # # membership (i.e. the room shouldn't disappear if your using the + # # `is_encrypted` filter and you leave). + # if membership == Membership.LEAVE and is_outlier: + # invite_or_knock_event_id, invite_or_knock_membership = ( + # await self.db_pool.runInteraction( + # "sliding_sync_membership_snapshots_backfill._find_previous_membership", + # _find_previous_membership_txn, + # room_id, + # user_id, + # membership_event_stream_ordering, + # ) + # ) + + # # Pull from the stripped state on the invite/knock event + # invite_or_knock_event = await store.get_event(invite_or_knock_event_id) + + # raw_stripped_state_events = None + # if invite_or_knock_membership == Membership.INVITE: + # invite_room_state = invite_or_knock_event.unsigned.get( + # "invite_room_state" + # ) + # raw_stripped_state_events = invite_room_state + # elif invite_or_knock_membership == Membership.KNOCK: + # knock_room_state = invite_or_knock_event.unsigned.get( + # "knock_room_state" + # ) + # raw_stripped_state_events = knock_room_state + + # sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( + # "sliding_sync_membership_snapshots_backfill._get_sliding_sync_insert_values_from_stripped_state_txn", + # PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, + # raw_stripped_state_events, + # ) + + # # We should have some insert values for each room, even if no + # # stripped state is on the event because we still want to record + # # that we have no known state + # assert sliding_sync_membership_snapshots_insert_map + # elif membership in (Membership.LEAVE, Membership.BAN): + # # Pull from historical state + # state_group = await store._get_state_group_for_event( + # membership_event_id + # ) + # # We should know the state for the event + # assert state_group is not None + + # state_by_group = await self.db_pool.runInteraction( + # "sliding_sync_membership_snapshots_backfill._get_state_groups_from_groups_txn", + # self._get_state_groups_from_groups_txn, + # groups=[state_group], + # state_filter=StateFilter.from_types( + # SLIDING_SYNC_RELEVANT_STATE_SET + # ), + # ) + # state_ids_map = state_by_group[state_group] + + # fetched_events = await store.get_events(state_ids_map.values()) + + # state_map: StateMap[EventBase] = { + # state_key: fetched_events[event_id] + # for state_key, event_id in state_ids_map.items() + # } + + # state_insert_values = EventsPersistenceStorageController._get_sliding_sync_insert_values_from_state_map( + # state_map + # ) + # sliding_sync_membership_snapshots_insert_map.update(state_insert_values) + # # We should have some insert values for each room, even if they are `None` + # assert sliding_sync_membership_snapshots_insert_map + + # # We have historical state to work from + # sliding_sync_membership_snapshots_insert_map["has_known_state"] = True + # else: + # assert_never(membership) + + # to_insert_membership_snapshots[(room_id, user_id)] = ( + # sliding_sync_membership_snapshots_insert_map + # ) + # to_insert_membership_infos[(room_id, user_id)] = SlidingSyncMembershipInfo( + # user_id=user_id, + # sender=sender, + # membership_event_id=membership_event_id, + # membership=membership, + # membership_event_stream_ordering=membership_event_stream_ordering, + # ) + + # def _backfill_table_txn(txn: LoggingTransaction) -> None: + # for key, insert_map in to_insert_membership_snapshots.items(): + # room_id, user_id = key + # membership_info = to_insert_membership_infos[key] + # membership_event_id = membership_info.membership_event_id + # membership = membership_info.membership + # membership_event_stream_ordering = ( + # membership_info.membership_event_stream_ordering + # ) + + # # Pulling keys/values separately is safe and will produce congruent + # # lists + # insert_keys = insert_map.keys() + # insert_values = insert_map.values() + # # We don't need to do anything `ON CONFLICT` because we never partially + # # insert/update the snapshots + # txn.execute( + # f""" + # INSERT INTO sliding_sync_membership_snapshots + # (room_id, user_id, membership_event_id, membership, event_stream_ordering + # {("," + ", ".join(insert_keys)) if insert_keys else ""}) + # VALUES ( + # ?, ?, ?, ?, ?, + # {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} + # ) + # ON CONFLICT (room_id, user_id) + # DO NOTHING + # """, + # [ + # room_id, + # user_id, + # membership_event_id, + # membership, + # membership_event_stream_ordering, + # ] + # + list(insert_values), + # ) + + # await self.db_pool.runInteraction( + # "sliding_sync_membership_snapshots_backfill", _backfill_table_txn + # ) + + # # Update the progress + # ( + # _room_id, + # _user_id, + # _sender, + # _membership_event_id, + # _membership, + # membership_event_stream_ordering, + # _is_outlier, + # ) = memberships_to_update_rows[-1] + # await self.db_pool.updates._background_update_progress( + # _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + # {"last_event_stream_ordering": membership_event_stream_ordering}, + # ) + + # return len(memberships_to_update_rows) From 357132db1d4216a1e74d7234f443c9a2b38c1687 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 20 Aug 2024 21:54:03 -0500 Subject: [PATCH 067/142] Go back to simpler fetching senders --- synapse/storage/controllers/persist_events.py | 25 ++++++++++------- synapse/storage/databases/main/events.py | 8 ++---- .../storage/databases/main/events_worker.py | 28 ++++++++++++++++++- synapse/storage/databases/state/bg_updates.py | 10 +++---- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index c231c9eeaa4..3054ea43fa4 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -825,7 +825,7 @@ async def _calculate_sliding_sync_table_changes( if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): membership_event_id_to_user_id_map[event_id] = state_key[1] - membership_event_map: Dict[str, EventBase] = {} + event_id_to_sender_map: Dict[str, str] = {} # In normal event persist scenarios, we should be able to find the # membership events in the `events_and_contexts` given to us but it's # possible a state reset happened which added us to the room without a @@ -834,28 +834,33 @@ async def _calculate_sliding_sync_table_changes( for membership_event_id in membership_event_id_to_user_id_map.keys(): membership_event = event_map.get(membership_event_id) if membership_event: - membership_event_map[membership_event_id] = membership_event + event_id_to_sender_map[membership_event_id] = ( + membership_event.sender + ) else: missing_membership_event_ids.add(membership_event_id) # Otherwise, we need to find a couple events that we were reset to. if missing_membership_event_ids: - remaining_events = await self.main_store.get_events( - missing_membership_event_ids + remaining_event_id_to_sender_map = ( + await self.main_store.get_sender_for_event_ids( + missing_membership_event_ids + ) ) # There shouldn't be any missing events assert ( - remaining_events.keys() == missing_membership_event_ids - ), missing_membership_event_ids.difference(remaining_events.keys()) - membership_event_map.update(remaining_events) + remaining_event_id_to_sender_map.keys() + == missing_membership_event_ids + ), missing_membership_event_ids.difference( + remaining_event_id_to_sender_map.keys() + ) + event_id_to_sender_map.update(remaining_event_id_to_sender_map) membership_infos_to_insert_membership_snapshots = [ SlidingSyncMembershipInfo( user_id=user_id, - sender=membership_event_map[membership_event_id].sender, + sender=event_id_to_sender_map[membership_event_id], membership_event_id=membership_event_id, - membership=membership_event_map[membership_event_id].membership, - membership_event_stream_ordering=None, ) for membership_event_id, user_id in membership_event_id_to_user_id_map.items() ] diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f813d48519a..9d317048fe9 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -158,9 +158,6 @@ class SlidingSyncMembershipInfo: user_id: str sender: str membership_event_id: str - membership: str - # Sometimes we're working with events that aren't persisted yet - membership_event_stream_ordering: Optional[int] @attr.s(slots=True, auto_attribs=True) @@ -1510,7 +1507,8 @@ def _update_current_state_txn( (room_id, user_id, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, ?, + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), (SELECT stream_ordering FROM events WHERE event_id = ?) {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) @@ -1526,7 +1524,7 @@ def _update_current_state_txn( room_id, membership_info.user_id, membership_info.membership_event_id, - membership_info.membership, + membership_info.membership_event_id, membership_info.membership_event_id, ] + list(insert_values) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index cf24d845547..561807a84ef 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -81,7 +81,7 @@ MultiWriterIdGenerator, ) from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import JsonDict, get_domain_from_id +from synapse.types import JsonDict, StrCollection, get_domain_from_id from synapse.types.state import StateFilter from synapse.util import unwrapFirstError from synapse.util.async_helpers import ObservableDeferred, delay_cancellation @@ -1981,6 +1981,32 @@ async def get_event_ordering(self, event_id: str, room_id: str) -> Tuple[int, in return int(res[0]), int(res[1]) + async def get_sender_for_event_ids( + self, event_ids: StrCollection + ) -> Mapping[str, str]: + """ + Get the sender for a list of event IDs. + Args: + event_ids: The event IDs to look up. + Returns: + A mapping from event ID to event sender. + """ + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=( + "event_id", + "sender", + ), + desc="get_sender_for_event_ids", + ), + ) + + return dict(rows) + async def get_next_event_to_expire(self) -> Optional[Tuple[str, int]]: """Retrieve the entry with the lowest expiry timestamp in the event_expiry table, or None if there's no more event to expire. diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 3b55c528cee..79dc227bc43 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -1164,8 +1164,6 @@ def _txn(txn: LoggingTransaction) -> int: # user_id=user_id, # sender=sender, # membership_event_id=membership_event_id, - # membership=membership, - # membership_event_stream_ordering=membership_event_stream_ordering, # ) # def _backfill_table_txn(txn: LoggingTransaction) -> None: @@ -1190,7 +1188,9 @@ def _txn(txn: LoggingTransaction) -> int: # (room_id, user_id, membership_event_id, membership, event_stream_ordering # {("," + ", ".join(insert_keys)) if insert_keys else ""}) # VALUES ( - # ?, ?, ?, ?, ?, + # ?, ?, ?, + # (SELECT membership FROM room_memberships WHERE event_id = ?), + # (SELECT stream_ordering FROM events WHERE event_id = ?) # {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} # ) # ON CONFLICT (room_id, user_id) @@ -1200,8 +1200,8 @@ def _txn(txn: LoggingTransaction) -> int: # room_id, # user_id, # membership_event_id, - # membership, - # membership_event_stream_ordering, + # membership_event_id, + # membership_event_id, # ] # + list(insert_values), # ) From 0233e20aa3f53eefc21eed6c6c6ef27925c34ce6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 10:44:49 -0500 Subject: [PATCH 068/142] Use full event version after solving the circular import issues --- synapse/storage/controllers/persist_events.py | 59 +- synapse/storage/databases/main/events.py | 44 ++ synapse/storage/databases/state/bg_updates.py | 690 ++++++------------ 3 files changed, 278 insertions(+), 515 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 3054ea43fa4..121d31b5489 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -50,7 +50,7 @@ from twisted.internet import defer -from synapse.api.constants import EventContentFields, EventTypes, Membership +from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME @@ -68,6 +68,7 @@ from synapse.storage.databases.main.events import ( SLIDING_SYNC_RELEVANT_STATE_SET, DeltaState, + PersistEventsStore, SlidingSyncMembershipInfo, SlidingSyncMembershipSnapshotSharedInsertValues, SlidingSyncStateInsertValues, @@ -895,10 +896,8 @@ async def _calculate_sliding_sync_table_changes( } if current_state_map: - state_insert_values = ( - self._get_sliding_sync_insert_values_from_state_map( - current_state_map - ) + state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map ) membership_snapshot_shared_insert_values.update(state_insert_values) # We have current state to work from @@ -964,8 +963,10 @@ async def _calculate_sliding_sync_table_changes( for event in remaining_events.values(): current_state_map[(event.type, event.state_key)] = event - joined_room_updates = self._get_sliding_sync_insert_values_from_state_map( - current_state_map + joined_room_updates = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map + ) ) # If something is being deleted from the state, we need to clear it out @@ -987,50 +988,6 @@ async def _calculate_sliding_sync_table_changes( to_delete_membership_snapshots=user_ids_to_delete_membership_snapshots, ) - # TODO: Should we put this next to the other `_get_sliding_sync_*` functions? - @classmethod - def _get_sliding_sync_insert_values_from_state_map( - cls, state_map: StateMap[EventBase] - ) -> SlidingSyncStateInsertValues: - """ - Extract the relevant state values from the `state_map` needed to insert into the - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. - - Returns: - Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant - state values needed to insert into - the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. - """ - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_map: SlidingSyncStateInsertValues = {} - - # Parse the raw event JSON - for state_key, event in state_map.items(): - if state_key == (EventTypes.Create, ""): - room_type = event.content.get(EventContentFields.ROOM_TYPE) - # Scrutinize JSON values - if room_type is None or isinstance(room_type, str): - sliding_sync_insert_map["room_type"] = room_type - elif state_key == (EventTypes.RoomEncryption, ""): - encryption_algorithm = event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is_encrypted = encryption_algorithm is not None - sliding_sync_insert_map["is_encrypted"] = is_encrypted - elif state_key == (EventTypes.Name, ""): - room_name = event.content.get(EventContentFields.ROOM_NAME) - # Scrutinize JSON values - if room_name is None or isinstance(room_name, str): - sliding_sync_insert_map["room_name"] = room_name - else: - # We only expect to see events according to the - # `SLIDING_SYNC_RELEVANT_STATE_SET`. - raise AssertionError( - f"Unexpected event (we should not be fetching extra events): {state_key} {event.event_id}" - ) - - return sliding_sync_insert_map - async def _calculate_new_extremities( self, room_id: str, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 9d317048fe9..b87fa315322 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1655,6 +1655,50 @@ def _get_sliding_sync_insert_values_from_state_ids_map_txn( return sliding_sync_insert_map + # TODO: Should we put this next to the other `_get_sliding_sync_*` functions? + @classmethod + def _get_sliding_sync_insert_values_from_state_map( + cls, state_map: StateMap[EventBase] + ) -> SlidingSyncStateInsertValues: + """ + Extract the relevant state values from the `state_map` needed to insert into the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + + Returns: + Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant + state values needed to insert into + the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + """ + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_insert_map: SlidingSyncStateInsertValues = {} + + # Parse the raw event JSON + for state_key, event in state_map.items(): + if state_key == (EventTypes.Create, ""): + room_type = event.content.get(EventContentFields.ROOM_TYPE) + # Scrutinize JSON values + if room_type is None or isinstance(room_type, str): + sliding_sync_insert_map["room_type"] = room_type + elif state_key == (EventTypes.RoomEncryption, ""): + encryption_algorithm = event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is_encrypted = encryption_algorithm is not None + sliding_sync_insert_map["is_encrypted"] = is_encrypted + elif state_key == (EventTypes.Name, ""): + room_name = event.content.get(EventContentFields.ROOM_NAME) + # Scrutinize JSON values + if room_name is None or isinstance(room_name, str): + sliding_sync_insert_map["room_name"] = room_name + else: + # We only expect to see events according to the + # `SLIDING_SYNC_RELEVANT_STATE_SET`. + raise AssertionError( + f"Unexpected event (we should not be fetching extra events): {state_key} {event.event_id}" + ) + + return sliding_sync_insert_map + # TODO: Should we put this next to the other `_get_sliding_sync_*` function? @classmethod def _get_sliding_sync_insert_values_from_stripped_state_txn( diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 79dc227bc43..5898a0e6d49 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -20,13 +20,14 @@ # import logging -from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union, cast from typing_extensions import assert_never from synapse.api.constants import Membership +from synapse.events import EventBase from synapse.logging.opentracing import tag_args, trace -from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, @@ -35,6 +36,7 @@ from synapse.storage.databases.main.events import ( SLIDING_SYNC_RELEVANT_STATE_SET, PersistEventsStore, + SlidingSyncMembershipInfo, SlidingSyncMembershipSnapshotSharedInsertValues, ) from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine @@ -700,13 +702,16 @@ async def _sliding_sync_membership_snapshots_backfill( "last_event_stream_ordering", -(1 << 31) ) - def _txn(txn: LoggingTransaction) -> int: + def _find_memberships_to_update_txn( + txn: LoggingTransaction, + ) -> List[Tuple[str, str, str, str, str, int, bool]]: # Fetch the set of event IDs that we want to update txn.execute( """ SELECT c.room_id, c.user_id, + e.sender, c.event_id, c.membership, c.event_stream_ordering, @@ -720,165 +725,223 @@ def _txn(txn: LoggingTransaction) -> int: (last_event_stream_ordering, batch_size), ) - memberships_to_update_rows = txn.fetchall() - if not memberships_to_update_rows: - return 0 + memberships_to_update_rows = cast( + List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() + ) - for ( - room_id, - user_id, - membership_event_id, - membership, - membership_event_stream_ordering, - is_outlier, - ) in memberships_to_update_rows: - # We don't know how to handle `membership` values other than these. The - # code below would need to be updated. - assert membership in ( - Membership.JOIN, - Membership.INVITE, - Membership.KNOCK, - Membership.LEAVE, - Membership.BAN, - ) + return memberships_to_update_rows - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_insert_map: ( - SlidingSyncMembershipSnapshotSharedInsertValues - ) = {} - if membership == Membership.JOIN: - # If we're still joined, we can pull from current state - current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( - txn, room_id - ) - # We're iterating over rooms that we are joined to so they should - # have `current_state_events` and we should have some current state - # for each room - assert current_state_map + memberships_to_update_rows = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._find_memberships_to_update_txn", + _find_memberships_to_update_txn, + ) - state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( - txn, current_state_map - ) - sliding_sync_membership_snapshots_insert_map.update( - state_insert_values - ) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_membership_snapshots_insert_map + if not memberships_to_update_rows: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL + ) + return 0 - # We have current state to work from - sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( - True - ) - elif membership in (Membership.INVITE, Membership.KNOCK) or ( - membership == Membership.LEAVE and is_outlier - ): - invite_or_knock_event_id = membership_event_id - invite_or_knock_membership = membership - - # If the event is an `out_of_band_membership` (special case of - # `outlier`), we never had historical state so we have to pull from - # the stripped state on the previous invite/knock event. This gives - # us a consistent view of the room state regardless of your - # membership (i.e. the room shouldn't disappear if your using the - # `is_encrypted` filter and you leave). - if membership == Membership.LEAVE and is_outlier: - # Find the previous invite/knock event before the leave event - txn.execute( - """ - SELECT event_id, membership - FROM room_memberships - WHERE - room_id = ? - AND user_id = ? - AND event_stream_ordering < ? - ORDER BY event_stream_ordering DESC - LIMIT 1 - """, - ( - room_id, - user_id, - membership_event_stream_ordering, - ), - ) - row = txn.fetchone() - # We should see a corresponding previous invite/knock event - assert row is not None - invite_or_knock_event_id, invite_or_knock_membership = row + store = self.hs.get_storage_controllers().main - # Pull from the stripped state on the invite/knock event - txn.execute( - """ - SELECT json FROM event_json - WHERE event_id = ? - """, - (invite_or_knock_event_id,), - ) - row = txn.fetchone() - # We should find a corresponding event - assert row is not None - json = row[0] - event_json = db_to_json(json) - - raw_stripped_state_events = None - if invite_or_knock_membership == Membership.INVITE: - invite_room_state = event_json.get("unsigned").get( - "invite_room_state" - ) - raw_stripped_state_events = invite_room_state - elif invite_or_knock_membership == Membership.KNOCK: - knock_room_state = event_json.get("unsigned").get( - "knock_room_state" - ) - raw_stripped_state_events = knock_room_state + def _find_previous_membership_txn( + txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int + ) -> Tuple[str, str]: + # Find the previous invite/knock event before the leave event + txn.execute( + """ + SELECT event_id, membership + FROM room_memberships + WHERE + room_id = ? + AND user_id = ? + AND event_stream_ordering < ? + ORDER BY event_stream_ordering DESC + LIMIT 1 + """, + ( + room_id, + user_id, + stream_ordering, + ), + ) + row = txn.fetchone() - sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn( - txn, raw_stripped_state_events - ) - # We should have some insert values for each room, even if no - # stripped state is on the event because we still want to record - # that we have no known state - assert sliding_sync_membership_snapshots_insert_map - elif membership in (Membership.LEAVE, Membership.BAN): - # Pull from historical state - state_group = self.db_pool.simple_select_one_onecol_txn( - txn, - table="event_to_state_groups", - keyvalues={"event_id": membership_event_id}, - retcol="state_group", - allow_none=True, - ) - # We should know the state for the event - assert state_group is not None + # We should see a corresponding previous invite/knock event + assert row is not None + event_id, membership = row - state_by_group = self._get_state_groups_from_groups_txn( - txn, - groups=[state_group], + return event_id, membership + + # Map from (room_id, user_id) to ... + to_insert_membership_snapshots: Dict[ + Tuple[str, str], SlidingSyncMembershipSnapshotSharedInsertValues + ] = {} + to_insert_membership_infos: Dict[Tuple[str, str], SlidingSyncMembershipInfo] = ( + {} + ) + for ( + room_id, + user_id, + sender, + membership_event_id, + membership, + membership_event_stream_ordering, + is_outlier, + ) in memberships_to_update_rows: + # We don't know how to handle `membership` values other than these. The + # code below would need to be updated. + assert membership in ( + Membership.JOIN, + Membership.INVITE, + Membership.KNOCK, + Membership.LEAVE, + Membership.BAN, + ) + + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_insert_map: ( + SlidingSyncMembershipSnapshotSharedInsertValues + ) = {} + if membership == Membership.JOIN: + # If we're still joined, we can pull from current state. + current_state_ids_map: StateMap[str] = ( + await store.get_partial_filtered_current_state_ids( + room_id, state_filter=StateFilter.from_types( SLIDING_SYNC_RELEVANT_STATE_SET ), ) - state_map = state_by_group[state_group] + ) + # We're iterating over rooms that we are joined to so they should + # have `current_state_events` and we should have some current state + # for each room + assert current_state_ids_map + + fetched_events = await store.get_events(current_state_ids_map.values()) + + current_state_map: StateMap[EventBase] = { + state_key: fetched_events[event_id] + for state_key, event_id in current_state_ids_map.items() + } - state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( - txn, state_map + state_insert_values = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map ) - sliding_sync_membership_snapshots_insert_map.update( - state_insert_values + ) + sliding_sync_membership_snapshots_insert_map.update(state_insert_values) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have current state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = True + elif membership in (Membership.INVITE, Membership.KNOCK) or ( + membership == Membership.LEAVE and is_outlier + ): + invite_or_knock_event_id = membership_event_id + invite_or_knock_membership = membership + + # If the event is an `out_of_band_membership` (special case of + # `outlier`), we never had historical state so we have to pull from + # the stripped state on the previous invite/knock event. This gives + # us a consistent view of the room state regardless of your + # membership (i.e. the room shouldn't disappear if your using the + # `is_encrypted` filter and you leave). + if membership == Membership.LEAVE and is_outlier: + invite_or_knock_event_id, invite_or_knock_membership = ( + await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._find_previous_membership", + _find_previous_membership_txn, + room_id, + user_id, + membership_event_stream_ordering, + ) ) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_membership_snapshots_insert_map - # We have historical state to work from - sliding_sync_membership_snapshots_insert_map["has_known_state"] = ( - True + # Pull from the stripped state on the invite/knock event + invite_or_knock_event = await store.get_event(invite_or_knock_event_id) + + raw_stripped_state_events = None + if invite_or_knock_membership == Membership.INVITE: + invite_room_state = invite_or_knock_event.unsigned.get( + "invite_room_state" + ) + raw_stripped_state_events = invite_room_state + elif invite_or_knock_membership == Membership.KNOCK: + knock_room_state = invite_or_knock_event.unsigned.get( + "knock_room_state" ) - else: - assert_never(membership) + raw_stripped_state_events = knock_room_state + + sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._get_sliding_sync_insert_values_from_stripped_state_txn", + PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, + raw_stripped_state_events, + ) + + # We should have some insert values for each room, even if no + # stripped state is on the event because we still want to record + # that we have no known state + assert sliding_sync_membership_snapshots_insert_map + elif membership in (Membership.LEAVE, Membership.BAN): + # Pull from historical state + state_group = await store._get_state_group_for_event( + membership_event_id + ) + # We should know the state for the event + assert state_group is not None + + state_by_group = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._get_state_groups_from_groups_txn", + self._get_state_groups_from_groups_txn, + groups=[state_group], + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + ) + state_ids_map = state_by_group[state_group] + + fetched_events = await store.get_events(state_ids_map.values()) + + state_map: StateMap[EventBase] = { + state_key: fetched_events[event_id] + for state_key, event_id in state_ids_map.items() + } + + state_insert_values = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + state_map + ) + ) + sliding_sync_membership_snapshots_insert_map.update(state_insert_values) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have historical state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = True + else: + assert_never(membership) + + to_insert_membership_snapshots[(room_id, user_id)] = ( + sliding_sync_membership_snapshots_insert_map + ) + to_insert_membership_infos[(room_id, user_id)] = SlidingSyncMembershipInfo( + user_id=user_id, + sender=sender, + membership_event_id=membership_event_id, + ) + + def _backfill_table_txn(txn: LoggingTransaction) -> None: + for key, insert_map in to_insert_membership_snapshots.items(): + room_id, user_id = key + membership_info = to_insert_membership_infos[key] + membership_event_id = membership_info.membership_event_id # Pulling keys/values separately is safe and will produce congruent # lists - insert_keys = sliding_sync_membership_snapshots_insert_map.keys() - insert_values = sliding_sync_membership_snapshots_insert_map.values() + insert_keys = insert_map.keys() + insert_values = insert_map.values() # We don't need to do anything `ON CONFLICT` because we never partially # insert/update the snapshots txn.execute( @@ -887,7 +950,8 @@ def _txn(txn: LoggingTransaction) -> int: (room_id, user_id, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, ?, + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), (SELECT stream_ordering FROM events WHERE event_id = ?) {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) @@ -898,331 +962,29 @@ def _txn(txn: LoggingTransaction) -> int: room_id, user_id, membership_event_id, - membership, + membership_event_id, membership_event_id, ] + list(insert_values), ) - ( - _room_id, - _user_id, - _membership_event_id, - _membership, - membership_event_stream_ordering, - _is_outlier, - ) = memberships_to_update_rows[-1] - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - {"last_event_stream_ordering": membership_event_stream_ordering}, - ) - - return len(memberships_to_update_rows) - - count = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill", _txn + await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill", _backfill_table_txn ) - if not count: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL - ) - - return count + # Update the progress + ( + _room_id, + _user_id, + _sender, + _membership_event_id, + _membership, + membership_event_stream_ordering, + _is_outlier, + ) = memberships_to_update_rows[-1] + await self.db_pool.updates._background_update_progress( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + {"last_event_stream_ordering": membership_event_stream_ordering}, + ) - # async def _sliding_sync_membership_snapshots_backfill( - # self, progress: JsonDict, batch_size: int - # ) -> int: - # """ - # Handles backfilling the `sliding_sync_membership_snapshots` table. - # """ - # last_event_stream_ordering = progress.get( - # "last_event_stream_ordering", -(1 << 31) - # ) - - # def _find_memberships_to_update_txn( - # txn: LoggingTransaction, - # ) -> List[Tuple[str, str, str, str, str, int, bool]]: - # # Fetch the set of event IDs that we want to update - # txn.execute( - # """ - # SELECT - # c.room_id, - # c.user_id, - # e.sender - # c.event_id, - # c.membership, - # c.event_stream_ordering, - # e.outlier - # FROM local_current_membership as c - # INNER JOIN events AS e USING (event_id) - # WHERE event_stream_ordering > ? - # ORDER BY event_stream_ordering ASC - # LIMIT ? - # """, - # (last_event_stream_ordering, batch_size), - # ) - - # memberships_to_update_rows = cast( - # List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() - # ) - - # return memberships_to_update_rows - - # memberships_to_update_rows = await self.db_pool.runInteraction( - # "sliding_sync_membership_snapshots_backfill._find_memberships_to_update_txn", - # _find_memberships_to_update_txn, - # ) - - # if not memberships_to_update_rows: - # await self.db_pool.updates._end_background_update( - # _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL - # ) - - # store = self.hs.get_storage_controllers().main - - # def _find_previous_membership_txn( - # txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int - # ) -> Tuple[str, str]: - # # Find the previous invite/knock event before the leave event - # txn.execute( - # """ - # SELECT event_id, membership - # FROM room_memberships - # WHERE - # room_id = ? - # AND user_id = ? - # AND event_stream_ordering < ? - # ORDER BY event_stream_ordering DESC - # LIMIT 1 - # """, - # ( - # room_id, - # user_id, - # stream_ordering, - # ), - # ) - # row = txn.fetchone() - - # # We should see a corresponding previous invite/knock event - # assert row is not None - # event_id, membership = row - - # return event_id, membership - - # # Map from (room_id, user_id) to ... - # to_insert_membership_snapshots: Dict[ - # Tuple[str, str], SlidingSyncMembershipSnapshotSharedInsertValues - # ] = {} - # to_insert_membership_infos: Dict[Tuple[str, str], SlidingSyncMembershipInfo] = ( - # {} - # ) - # for ( - # room_id, - # user_id, - # sender, - # membership_event_id, - # membership, - # membership_event_stream_ordering, - # is_outlier, - # ) in memberships_to_update_rows: - # # We don't know how to handle `membership` values other than these. The - # # code below would need to be updated. - # assert membership in ( - # Membership.JOIN, - # Membership.INVITE, - # Membership.KNOCK, - # Membership.LEAVE, - # Membership.BAN, - # ) - - # # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - # sliding_sync_membership_snapshots_insert_map: ( - # SlidingSyncMembershipSnapshotSharedInsertValues - # ) = {} - # if membership == Membership.JOIN: - # # If we're still joined, we can pull from current state. - # current_state_ids_map: StateMap[str] = ( - # await store.get_partial_filtered_current_state_ids( - # room_id, - # state_filter=StateFilter.from_types( - # SLIDING_SYNC_RELEVANT_STATE_SET - # ), - # ) - # ) - # # We're iterating over rooms that we are joined to so they should - # # have `current_state_events` and we should have some current state - # # for each room - # assert current_state_ids_map - - # fetched_events = await store.get_events(current_state_ids_map.values()) - - # current_state_map: StateMap[EventBase] = { - # state_key: fetched_events[event_id] - # for state_key, event_id in current_state_ids_map.items() - # } - - # state_insert_values = EventsPersistenceStorageController._get_sliding_sync_insert_values_from_state_map( - # current_state_map - # ) - # sliding_sync_membership_snapshots_insert_map.update(state_insert_values) - # # We should have some insert values for each room, even if they are `None` - # assert sliding_sync_membership_snapshots_insert_map - - # # We have current state to work from - # sliding_sync_membership_snapshots_insert_map["has_known_state"] = True - # elif membership in (Membership.INVITE, Membership.KNOCK) or ( - # membership == Membership.LEAVE and is_outlier - # ): - # invite_or_knock_event_id = membership_event_id - # invite_or_knock_membership = membership - - # # If the event is an `out_of_band_membership` (special case of - # # `outlier`), we never had historical state so we have to pull from - # # the stripped state on the previous invite/knock event. This gives - # # us a consistent view of the room state regardless of your - # # membership (i.e. the room shouldn't disappear if your using the - # # `is_encrypted` filter and you leave). - # if membership == Membership.LEAVE and is_outlier: - # invite_or_knock_event_id, invite_or_knock_membership = ( - # await self.db_pool.runInteraction( - # "sliding_sync_membership_snapshots_backfill._find_previous_membership", - # _find_previous_membership_txn, - # room_id, - # user_id, - # membership_event_stream_ordering, - # ) - # ) - - # # Pull from the stripped state on the invite/knock event - # invite_or_knock_event = await store.get_event(invite_or_knock_event_id) - - # raw_stripped_state_events = None - # if invite_or_knock_membership == Membership.INVITE: - # invite_room_state = invite_or_knock_event.unsigned.get( - # "invite_room_state" - # ) - # raw_stripped_state_events = invite_room_state - # elif invite_or_knock_membership == Membership.KNOCK: - # knock_room_state = invite_or_knock_event.unsigned.get( - # "knock_room_state" - # ) - # raw_stripped_state_events = knock_room_state - - # sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( - # "sliding_sync_membership_snapshots_backfill._get_sliding_sync_insert_values_from_stripped_state_txn", - # PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, - # raw_stripped_state_events, - # ) - - # # We should have some insert values for each room, even if no - # # stripped state is on the event because we still want to record - # # that we have no known state - # assert sliding_sync_membership_snapshots_insert_map - # elif membership in (Membership.LEAVE, Membership.BAN): - # # Pull from historical state - # state_group = await store._get_state_group_for_event( - # membership_event_id - # ) - # # We should know the state for the event - # assert state_group is not None - - # state_by_group = await self.db_pool.runInteraction( - # "sliding_sync_membership_snapshots_backfill._get_state_groups_from_groups_txn", - # self._get_state_groups_from_groups_txn, - # groups=[state_group], - # state_filter=StateFilter.from_types( - # SLIDING_SYNC_RELEVANT_STATE_SET - # ), - # ) - # state_ids_map = state_by_group[state_group] - - # fetched_events = await store.get_events(state_ids_map.values()) - - # state_map: StateMap[EventBase] = { - # state_key: fetched_events[event_id] - # for state_key, event_id in state_ids_map.items() - # } - - # state_insert_values = EventsPersistenceStorageController._get_sliding_sync_insert_values_from_state_map( - # state_map - # ) - # sliding_sync_membership_snapshots_insert_map.update(state_insert_values) - # # We should have some insert values for each room, even if they are `None` - # assert sliding_sync_membership_snapshots_insert_map - - # # We have historical state to work from - # sliding_sync_membership_snapshots_insert_map["has_known_state"] = True - # else: - # assert_never(membership) - - # to_insert_membership_snapshots[(room_id, user_id)] = ( - # sliding_sync_membership_snapshots_insert_map - # ) - # to_insert_membership_infos[(room_id, user_id)] = SlidingSyncMembershipInfo( - # user_id=user_id, - # sender=sender, - # membership_event_id=membership_event_id, - # ) - - # def _backfill_table_txn(txn: LoggingTransaction) -> None: - # for key, insert_map in to_insert_membership_snapshots.items(): - # room_id, user_id = key - # membership_info = to_insert_membership_infos[key] - # membership_event_id = membership_info.membership_event_id - # membership = membership_info.membership - # membership_event_stream_ordering = ( - # membership_info.membership_event_stream_ordering - # ) - - # # Pulling keys/values separately is safe and will produce congruent - # # lists - # insert_keys = insert_map.keys() - # insert_values = insert_map.values() - # # We don't need to do anything `ON CONFLICT` because we never partially - # # insert/update the snapshots - # txn.execute( - # f""" - # INSERT INTO sliding_sync_membership_snapshots - # (room_id, user_id, membership_event_id, membership, event_stream_ordering - # {("," + ", ".join(insert_keys)) if insert_keys else ""}) - # VALUES ( - # ?, ?, ?, - # (SELECT membership FROM room_memberships WHERE event_id = ?), - # (SELECT stream_ordering FROM events WHERE event_id = ?) - # {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} - # ) - # ON CONFLICT (room_id, user_id) - # DO NOTHING - # """, - # [ - # room_id, - # user_id, - # membership_event_id, - # membership_event_id, - # membership_event_id, - # ] - # + list(insert_values), - # ) - - # await self.db_pool.runInteraction( - # "sliding_sync_membership_snapshots_backfill", _backfill_table_txn - # ) - - # # Update the progress - # ( - # _room_id, - # _user_id, - # _sender, - # _membership_event_id, - # _membership, - # membership_event_stream_ordering, - # _is_outlier, - # ) = memberships_to_update_rows[-1] - # await self.db_pool.updates._background_update_progress( - # _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - # {"last_event_stream_ordering": membership_event_stream_ordering}, - # ) - - # return len(memberships_to_update_rows) + return len(memberships_to_update_rows) From a5e06c6a8df04c3efc6ce78adce9bb73d02454ef Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 11:14:15 -0500 Subject: [PATCH 069/142] Move back to the main store --- .../databases/main/events_bg_updates.py | 476 +++++++++++++++++- synapse/storage/databases/state/bg_updates.py | 472 +---------------- tests/storage/test_events.py | 2 +- 3 files changed, 475 insertions(+), 475 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 64d303e3307..8e12645d6b3 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -24,9 +24,9 @@ import attr -from synapse.api.constants import EventContentFields, RelationTypes +from synapse.api.constants import EventContentFields, Membership, RelationTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS -from synapse.events import make_event_from_dict +from synapse.events import EventBase, make_event_from_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -34,9 +34,18 @@ LoggingTransaction, make_tuple_comparison_clause, ) -from synapse.storage.databases.main.events import PersistEventsStore +from synapse.storage.databases.main.events import ( + SLIDING_SYNC_RELEVANT_STATE_SET, + PersistEventsStore, + SlidingSyncMembershipInfo, + SlidingSyncMembershipSnapshotSharedInsertValues, +) +from synapse.storage.databases.main.events_worker import EventsWorkerStore +from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.types import Cursor -from synapse.types import JsonDict, StrCollection +from synapse.types import JsonDict, StateMap, StrCollection +from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES +from synapse.types.state import StateFilter if TYPE_CHECKING: from synapse.server import HomeServer @@ -78,6 +87,11 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" + SLIDING_SYNC_JOINED_ROOMS_BACKFILL = "sliding_sync_joined_rooms_backfill" + SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL = ( + "sliding_sync_membership_snapshots_backfill" + ) + @attr.s(slots=True, frozen=True, auto_attribs=True) class _CalculateChainCover: @@ -97,7 +111,7 @@ class _CalculateChainCover: finished_room_map: Dict[str, Tuple[int, int]] -class EventsBackgroundUpdatesStore(SQLBaseStore): +class EventsBackgroundUpdatesStore(EventsWorkerStore, SQLBaseStore): def __init__( self, database: DatabasePool, @@ -279,6 +293,16 @@ def __init__( where_clause="NOT outlier", ) + # Backfill the sliding sync tables + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + self._sliding_sync_joined_rooms_backfill, + ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + self._sliding_sync_membership_snapshots_backfill, + ) + async def _background_reindex_fields_sender( self, progress: JsonDict, batch_size: int ) -> int: @@ -1073,7 +1097,7 @@ def _calculate_chain_cover_txn( PersistEventsStore._add_chain_cover_index( txn, self.db_pool, - self.event_chain_id_gen, # type: ignore[attr-defined] + self.event_chain_id_gen, event_to_room_id, event_to_types, cast(Dict[str, StrCollection], event_to_auth_chain), @@ -1516,3 +1540,443 @@ def _populate_txn(txn: LoggingTransaction) -> bool: ) return batch_size + + async def _sliding_sync_joined_rooms_backfill( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Handles backfilling the `sliding_sync_joined_rooms` table. + """ + last_room_id = progress.get("last_room_id", "") + + def make_sql_clause_for_get_last_event_pos_in_room( + database_engine: BaseDatabaseEngine, + event_types: Optional[StrCollection] = None, + ) -> Tuple[str, list]: + """ + Returns the ID and event position of the last event in a room at or before a + stream ordering. + + Based on `get_last_event_pos_in_room_before_stream_ordering(...)` + + Args: + database_engine + event_types: Optional allowlist of event types to filter by + + Returns: + A tuple of SQL query and the args + """ + event_type_clause = "" + event_type_args: List[str] = [] + if event_types is not None and len(event_types) > 0: + event_type_clause, event_type_args = make_in_list_sql_clause( + database_engine, "type", event_types + ) + event_type_clause = f"AND {event_type_clause}" + + sql = f""" + SELECT stream_ordering + FROM events + LEFT JOIN rejections USING (event_id) + WHERE room_id = ? + {event_type_clause} + AND NOT outlier + AND rejections.event_id IS NULL + ORDER BY stream_ordering DESC + LIMIT 1 + """ + + return sql, event_type_args + + def _txn(txn: LoggingTransaction) -> int: + # Fetch the set of room IDs that we want to update + txn.execute( + """ + SELECT DISTINCT room_id FROM current_state_events + WHERE room_id > ? + ORDER BY room_id ASC + LIMIT ? + """, + (last_room_id, batch_size), + ) + + rooms_to_update_rows = txn.fetchall() + if not rooms_to_update_rows: + return 0 + + for (room_id,) in rooms_to_update_rows: + # TODO: Handle redactions + current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( + txn, room_id + ) + # We're iterating over rooms pulled from the current_state_events table + # so we should have some current state for each room + assert current_state_map + + sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( + txn, current_state_map + ) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_joined_rooms_insert_map + + ( + most_recent_event_stream_ordering_clause, + most_recent_event_stream_ordering_args, + ) = make_sql_clause_for_get_last_event_pos_in_room( + txn.database_engine, event_types=None + ) + bump_stamp_clause, bump_stamp_args = ( + make_sql_clause_for_get_last_event_pos_in_room( + txn.database_engine, + event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, + ) + ) + + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = sliding_sync_joined_rooms_insert_map.keys() + insert_values = sliding_sync_joined_rooms_insert_map.values() + + sql = f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) + VALUES ( + ?, + ({most_recent_event_stream_ordering_clause}), + ({bump_stamp_clause}), + {", ".join("?" for _ in insert_values)} + ) + ON CONFLICT (room_id) + DO UPDATE SET + event_stream_ordering = EXCLUDED.event_stream_ordering, + bump_stamp = EXCLUDED.bump_stamp, + {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + """ + args = ( + [room_id, room_id] + + most_recent_event_stream_ordering_args + + [room_id] + + bump_stamp_args + + list(insert_values) + ) + txn.execute(sql, args) + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + {"last_room_id": rooms_to_update_rows[-1][0]}, + ) + + return len(rooms_to_update_rows) + + count = await self.db_pool.runInteraction( + "sliding_sync_joined_rooms_backfill", _txn + ) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL + ) + + return count + + async def _sliding_sync_membership_snapshots_backfill( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Handles backfilling the `sliding_sync_membership_snapshots` table. + """ + last_event_stream_ordering = progress.get( + "last_event_stream_ordering", -(1 << 31) + ) + + def _find_memberships_to_update_txn( + txn: LoggingTransaction, + ) -> List[Tuple[str, str, str, str, str, int, bool]]: + # Fetch the set of event IDs that we want to update + txn.execute( + """ + SELECT + c.room_id, + c.user_id, + e.sender, + c.event_id, + c.membership, + c.event_stream_ordering, + e.outlier + FROM local_current_membership as c + INNER JOIN events AS e USING (event_id) + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering ASC + LIMIT ? + """, + (last_event_stream_ordering, batch_size), + ) + + memberships_to_update_rows = cast( + List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() + ) + + return memberships_to_update_rows + + memberships_to_update_rows = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._find_memberships_to_update_txn", + _find_memberships_to_update_txn, + ) + + if not memberships_to_update_rows: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL + ) + return 0 + + def _find_previous_membership_txn( + txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int + ) -> Tuple[str, str]: + # Find the previous invite/knock event before the leave event + txn.execute( + """ + SELECT event_id, membership + FROM room_memberships + WHERE + room_id = ? + AND user_id = ? + AND event_stream_ordering < ? + ORDER BY event_stream_ordering DESC + LIMIT 1 + """, + ( + room_id, + user_id, + stream_ordering, + ), + ) + row = txn.fetchone() + + # We should see a corresponding previous invite/knock event + assert row is not None + event_id, membership = row + + return event_id, membership + + # Map from (room_id, user_id) to ... + to_insert_membership_snapshots: Dict[ + Tuple[str, str], SlidingSyncMembershipSnapshotSharedInsertValues + ] = {} + to_insert_membership_infos: Dict[Tuple[str, str], SlidingSyncMembershipInfo] = ( + {} + ) + for ( + room_id, + user_id, + sender, + membership_event_id, + membership, + membership_event_stream_ordering, + is_outlier, + ) in memberships_to_update_rows: + # We don't know how to handle `membership` values other than these. The + # code below would need to be updated. + assert membership in ( + Membership.JOIN, + Membership.INVITE, + Membership.KNOCK, + Membership.LEAVE, + Membership.BAN, + ) + + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_insert_map: ( + SlidingSyncMembershipSnapshotSharedInsertValues + ) = {} + if membership == Membership.JOIN: + # If we're still joined, we can pull from current state. + current_state_ids_map: StateMap[ + str + ] = await self.hs.get_storage_controllers().state.get_current_state_ids( + room_id, + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + # Partially-stated rooms should have all state events except for + # remote membership events so we don't need to wait at all because + # we only want some non-membership state + await_full_state=False, + ) + # We're iterating over rooms that we are joined to so they should + # have `current_state_events` and we should have some current state + # for each room + assert current_state_ids_map + + fetched_events = await self.get_events(current_state_ids_map.values()) + + current_state_map: StateMap[EventBase] = { + state_key: fetched_events[event_id] + for state_key, event_id in current_state_ids_map.items() + } + + state_insert_values = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map + ) + ) + sliding_sync_membership_snapshots_insert_map.update(state_insert_values) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have current state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = True + elif membership in (Membership.INVITE, Membership.KNOCK) or ( + membership == Membership.LEAVE and is_outlier + ): + invite_or_knock_event_id = membership_event_id + invite_or_knock_membership = membership + + # If the event is an `out_of_band_membership` (special case of + # `outlier`), we never had historical state so we have to pull from + # the stripped state on the previous invite/knock event. This gives + # us a consistent view of the room state regardless of your + # membership (i.e. the room shouldn't disappear if your using the + # `is_encrypted` filter and you leave). + if membership == Membership.LEAVE and is_outlier: + invite_or_knock_event_id, invite_or_knock_membership = ( + await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._find_previous_membership", + _find_previous_membership_txn, + room_id, + user_id, + membership_event_stream_ordering, + ) + ) + + # Pull from the stripped state on the invite/knock event + invite_or_knock_event = await self.get_event(invite_or_knock_event_id) + + raw_stripped_state_events = None + if invite_or_knock_membership == Membership.INVITE: + invite_room_state = invite_or_knock_event.unsigned.get( + "invite_room_state" + ) + raw_stripped_state_events = invite_room_state + elif invite_or_knock_membership == Membership.KNOCK: + knock_room_state = invite_or_knock_event.unsigned.get( + "knock_room_state" + ) + raw_stripped_state_events = knock_room_state + + sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill._get_sliding_sync_insert_values_from_stripped_state_txn", + PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, + raw_stripped_state_events, + ) + + # We should have some insert values for each room, even if no + # stripped state is on the event because we still want to record + # that we have no known state + assert sliding_sync_membership_snapshots_insert_map + elif membership in (Membership.LEAVE, Membership.BAN): + # Pull from historical state + state_ids_map = await self.hs.get_storage_controllers().state.get_state_ids_for_event( + membership_event_id, + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + # Partially-stated rooms should have all state events except for + # remote membership events so we don't need to wait at all because + # we only want some non-membership state + await_full_state=False, + ) + + fetched_events = await self.get_events(state_ids_map.values()) + + state_map: StateMap[EventBase] = { + state_key: fetched_events[event_id] + for state_key, event_id in state_ids_map.items() + } + + state_insert_values = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + state_map + ) + ) + sliding_sync_membership_snapshots_insert_map.update(state_insert_values) + # We should have some insert values for each room, even if they are `None` + assert sliding_sync_membership_snapshots_insert_map + + # We have historical state to work from + sliding_sync_membership_snapshots_insert_map["has_known_state"] = True + else: + # We don't know how to handle this type of membership yet + # + # FIXME: We should use `assert_never` here but for some reason + # the exhaustive matching doesn't recognize the `Never` here. + # assert_never(membership) + raise AssertionError( + f"Unexpected membership {membership} ({membership_event_id}) that we don't know how to handle yet" + ) + + to_insert_membership_snapshots[(room_id, user_id)] = ( + sliding_sync_membership_snapshots_insert_map + ) + to_insert_membership_infos[(room_id, user_id)] = SlidingSyncMembershipInfo( + user_id=user_id, + sender=sender, + membership_event_id=membership_event_id, + ) + + def _backfill_table_txn(txn: LoggingTransaction) -> None: + for key, insert_map in to_insert_membership_snapshots.items(): + room_id, user_id = key + membership_info = to_insert_membership_infos[key] + membership_event_id = membership_info.membership_event_id + + # Pulling keys/values separately is safe and will produce congruent + # lists + insert_keys = insert_map.keys() + insert_values = insert_map.values() + # We don't need to do anything `ON CONFLICT` because we never partially + # insert/update the snapshots + txn.execute( + f""" + INSERT INTO sliding_sync_membership_snapshots + (room_id, user_id, membership_event_id, membership, event_stream_ordering + {("," + ", ".join(insert_keys)) if insert_keys else ""}) + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?) + {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} + ) + ON CONFLICT (room_id, user_id) + DO NOTHING + """, + [ + room_id, + user_id, + membership_event_id, + membership_event_id, + membership_event_id, + ] + + list(insert_values), + ) + + await self.db_pool.runInteraction( + "sliding_sync_membership_snapshots_backfill", _backfill_table_txn + ) + + # Update the progress + ( + _room_id, + _user_id, + _sender, + _membership_event_id, + _membership, + membership_event_stream_ordering, + _is_outlier, + ) = memberships_to_update_rows[-1] + await self.db_pool.updates._background_update_progress( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + {"last_event_stream_ordering": membership_event_stream_ordering}, + ) + + return len(memberships_to_update_rows) diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 5898a0e6d49..ea7d8199a7d 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -20,28 +20,17 @@ # import logging -from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union, cast +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union -from typing_extensions import assert_never - -from synapse.api.constants import Membership -from synapse.events import EventBase from synapse.logging.opentracing import tag_args, trace -from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause +from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, LoggingTransaction, ) -from synapse.storage.databases.main.events import ( - SLIDING_SYNC_RELEVANT_STATE_SET, - PersistEventsStore, - SlidingSyncMembershipInfo, - SlidingSyncMembershipSnapshotSharedInsertValues, -) -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine -from synapse.types import JsonDict, MutableStateMap, StateMap, StrCollection -from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES +from synapse.storage.engines import PostgresEngine +from synapse.types import MutableStateMap, StateMap from synapse.types.state import StateFilter from synapse.util.caches import intern_string @@ -54,13 +43,6 @@ MAX_STATE_DELTA_HOPS = 100 -class _BackgroundUpdates: - SLIDING_SYNC_JOINED_ROOMS_BACKFILL = "sliding_sync_joined_rooms_backfill" - SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL = ( - "sliding_sync_membership_snapshots_backfill" - ) - - class StateGroupBackgroundUpdateStore(SQLBaseStore): """Defines functions related to state groups needed to run the state background updates. @@ -367,16 +349,6 @@ def __init__( columns=["event_stream_ordering"], ) - # Backfill the sliding sync tables - self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, - self._sliding_sync_joined_rooms_backfill, - ) - self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - self._sliding_sync_membership_snapshots_backfill, - ) - async def _background_deduplicate_state( self, progress: dict, batch_size: int ) -> int: @@ -552,439 +524,3 @@ def reindex_txn(conn: LoggingDatabaseConnection) -> None: ) return 1 - - async def _sliding_sync_joined_rooms_backfill( - self, progress: JsonDict, batch_size: int - ) -> int: - """ - Handles backfilling the `sliding_sync_joined_rooms` table. - """ - last_room_id = progress.get("last_room_id", "") - - def make_sql_clause_for_get_last_event_pos_in_room( - database_engine: BaseDatabaseEngine, - event_types: Optional[StrCollection] = None, - ) -> Tuple[str, list]: - """ - Returns the ID and event position of the last event in a room at or before a - stream ordering. - - Based on `get_last_event_pos_in_room_before_stream_ordering(...)` - - Args: - database_engine - event_types: Optional allowlist of event types to filter by - - Returns: - A tuple of SQL query and the args - """ - event_type_clause = "" - event_type_args: List[str] = [] - if event_types is not None and len(event_types) > 0: - event_type_clause, event_type_args = make_in_list_sql_clause( - database_engine, "type", event_types - ) - event_type_clause = f"AND {event_type_clause}" - - sql = f""" - SELECT stream_ordering - FROM events - LEFT JOIN rejections USING (event_id) - WHERE room_id = ? - {event_type_clause} - AND NOT outlier - AND rejections.event_id IS NULL - ORDER BY stream_ordering DESC - LIMIT 1 - """ - - return sql, event_type_args - - def _txn(txn: LoggingTransaction) -> int: - # Fetch the set of room IDs that we want to update - txn.execute( - """ - SELECT DISTINCT room_id FROM current_state_events - WHERE room_id > ? - ORDER BY room_id ASC - LIMIT ? - """, - (last_room_id, batch_size), - ) - - rooms_to_update_rows = txn.fetchall() - if not rooms_to_update_rows: - return 0 - - for (room_id,) in rooms_to_update_rows: - # TODO: Handle redactions - current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( - txn, room_id - ) - # We're iterating over rooms pulled from the current_state_events table - # so we should have some current state for each room - assert current_state_map - - sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( - txn, current_state_map - ) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_joined_rooms_insert_map - - ( - most_recent_event_stream_ordering_clause, - most_recent_event_stream_ordering_args, - ) = make_sql_clause_for_get_last_event_pos_in_room( - txn.database_engine, event_types=None - ) - bump_stamp_clause, bump_stamp_args = ( - make_sql_clause_for_get_last_event_pos_in_room( - txn.database_engine, - event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, - ) - ) - - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = sliding_sync_joined_rooms_insert_map.keys() - insert_values = sliding_sync_joined_rooms_insert_map.values() - - sql = f""" - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) - VALUES ( - ?, - ({most_recent_event_stream_ordering_clause}), - ({bump_stamp_clause}), - {", ".join("?" for _ in insert_values)} - ) - ON CONFLICT (room_id) - DO UPDATE SET - event_stream_ordering = EXCLUDED.event_stream_ordering, - bump_stamp = EXCLUDED.bump_stamp, - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} - """ - args = ( - [room_id, room_id] - + most_recent_event_stream_ordering_args - + [room_id] - + bump_stamp_args - + list(insert_values) - ) - txn.execute(sql, args) - - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, - {"last_room_id": rooms_to_update_rows[-1][0]}, - ) - - return len(rooms_to_update_rows) - - count = await self.db_pool.runInteraction( - "sliding_sync_joined_rooms_backfill", _txn - ) - - if not count: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL - ) - - return count - - async def _sliding_sync_membership_snapshots_backfill( - self, progress: JsonDict, batch_size: int - ) -> int: - """ - Handles backfilling the `sliding_sync_membership_snapshots` table. - """ - last_event_stream_ordering = progress.get( - "last_event_stream_ordering", -(1 << 31) - ) - - def _find_memberships_to_update_txn( - txn: LoggingTransaction, - ) -> List[Tuple[str, str, str, str, str, int, bool]]: - # Fetch the set of event IDs that we want to update - txn.execute( - """ - SELECT - c.room_id, - c.user_id, - e.sender, - c.event_id, - c.membership, - c.event_stream_ordering, - e.outlier - FROM local_current_membership as c - INNER JOIN events AS e USING (event_id) - WHERE event_stream_ordering > ? - ORDER BY event_stream_ordering ASC - LIMIT ? - """, - (last_event_stream_ordering, batch_size), - ) - - memberships_to_update_rows = cast( - List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() - ) - - return memberships_to_update_rows - - memberships_to_update_rows = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._find_memberships_to_update_txn", - _find_memberships_to_update_txn, - ) - - if not memberships_to_update_rows: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL - ) - return 0 - - store = self.hs.get_storage_controllers().main - - def _find_previous_membership_txn( - txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int - ) -> Tuple[str, str]: - # Find the previous invite/knock event before the leave event - txn.execute( - """ - SELECT event_id, membership - FROM room_memberships - WHERE - room_id = ? - AND user_id = ? - AND event_stream_ordering < ? - ORDER BY event_stream_ordering DESC - LIMIT 1 - """, - ( - room_id, - user_id, - stream_ordering, - ), - ) - row = txn.fetchone() - - # We should see a corresponding previous invite/knock event - assert row is not None - event_id, membership = row - - return event_id, membership - - # Map from (room_id, user_id) to ... - to_insert_membership_snapshots: Dict[ - Tuple[str, str], SlidingSyncMembershipSnapshotSharedInsertValues - ] = {} - to_insert_membership_infos: Dict[Tuple[str, str], SlidingSyncMembershipInfo] = ( - {} - ) - for ( - room_id, - user_id, - sender, - membership_event_id, - membership, - membership_event_stream_ordering, - is_outlier, - ) in memberships_to_update_rows: - # We don't know how to handle `membership` values other than these. The - # code below would need to be updated. - assert membership in ( - Membership.JOIN, - Membership.INVITE, - Membership.KNOCK, - Membership.LEAVE, - Membership.BAN, - ) - - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_insert_map: ( - SlidingSyncMembershipSnapshotSharedInsertValues - ) = {} - if membership == Membership.JOIN: - # If we're still joined, we can pull from current state. - current_state_ids_map: StateMap[str] = ( - await store.get_partial_filtered_current_state_ids( - room_id, - state_filter=StateFilter.from_types( - SLIDING_SYNC_RELEVANT_STATE_SET - ), - ) - ) - # We're iterating over rooms that we are joined to so they should - # have `current_state_events` and we should have some current state - # for each room - assert current_state_ids_map - - fetched_events = await store.get_events(current_state_ids_map.values()) - - current_state_map: StateMap[EventBase] = { - state_key: fetched_events[event_id] - for state_key, event_id in current_state_ids_map.items() - } - - state_insert_values = ( - PersistEventsStore._get_sliding_sync_insert_values_from_state_map( - current_state_map - ) - ) - sliding_sync_membership_snapshots_insert_map.update(state_insert_values) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_membership_snapshots_insert_map - - # We have current state to work from - sliding_sync_membership_snapshots_insert_map["has_known_state"] = True - elif membership in (Membership.INVITE, Membership.KNOCK) or ( - membership == Membership.LEAVE and is_outlier - ): - invite_or_knock_event_id = membership_event_id - invite_or_knock_membership = membership - - # If the event is an `out_of_band_membership` (special case of - # `outlier`), we never had historical state so we have to pull from - # the stripped state on the previous invite/knock event. This gives - # us a consistent view of the room state regardless of your - # membership (i.e. the room shouldn't disappear if your using the - # `is_encrypted` filter and you leave). - if membership == Membership.LEAVE and is_outlier: - invite_or_knock_event_id, invite_or_knock_membership = ( - await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._find_previous_membership", - _find_previous_membership_txn, - room_id, - user_id, - membership_event_stream_ordering, - ) - ) - - # Pull from the stripped state on the invite/knock event - invite_or_knock_event = await store.get_event(invite_or_knock_event_id) - - raw_stripped_state_events = None - if invite_or_knock_membership == Membership.INVITE: - invite_room_state = invite_or_knock_event.unsigned.get( - "invite_room_state" - ) - raw_stripped_state_events = invite_room_state - elif invite_or_knock_membership == Membership.KNOCK: - knock_room_state = invite_or_knock_event.unsigned.get( - "knock_room_state" - ) - raw_stripped_state_events = knock_room_state - - sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._get_sliding_sync_insert_values_from_stripped_state_txn", - PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, - raw_stripped_state_events, - ) - - # We should have some insert values for each room, even if no - # stripped state is on the event because we still want to record - # that we have no known state - assert sliding_sync_membership_snapshots_insert_map - elif membership in (Membership.LEAVE, Membership.BAN): - # Pull from historical state - state_group = await store._get_state_group_for_event( - membership_event_id - ) - # We should know the state for the event - assert state_group is not None - - state_by_group = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._get_state_groups_from_groups_txn", - self._get_state_groups_from_groups_txn, - groups=[state_group], - state_filter=StateFilter.from_types( - SLIDING_SYNC_RELEVANT_STATE_SET - ), - ) - state_ids_map = state_by_group[state_group] - - fetched_events = await store.get_events(state_ids_map.values()) - - state_map: StateMap[EventBase] = { - state_key: fetched_events[event_id] - for state_key, event_id in state_ids_map.items() - } - - state_insert_values = ( - PersistEventsStore._get_sliding_sync_insert_values_from_state_map( - state_map - ) - ) - sliding_sync_membership_snapshots_insert_map.update(state_insert_values) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_membership_snapshots_insert_map - - # We have historical state to work from - sliding_sync_membership_snapshots_insert_map["has_known_state"] = True - else: - assert_never(membership) - - to_insert_membership_snapshots[(room_id, user_id)] = ( - sliding_sync_membership_snapshots_insert_map - ) - to_insert_membership_infos[(room_id, user_id)] = SlidingSyncMembershipInfo( - user_id=user_id, - sender=sender, - membership_event_id=membership_event_id, - ) - - def _backfill_table_txn(txn: LoggingTransaction) -> None: - for key, insert_map in to_insert_membership_snapshots.items(): - room_id, user_id = key - membership_info = to_insert_membership_infos[key] - membership_event_id = membership_info.membership_event_id - - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = insert_map.keys() - insert_values = insert_map.values() - # We don't need to do anything `ON CONFLICT` because we never partially - # insert/update the snapshots - txn.execute( - f""" - INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering - {("," + ", ".join(insert_keys)) if insert_keys else ""}) - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) - {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} - ) - ON CONFLICT (room_id, user_id) - DO NOTHING - """, - [ - room_id, - user_id, - membership_event_id, - membership_event_id, - membership_event_id, - ] - + list(insert_values), - ) - - await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill", _backfill_table_txn - ) - - # Update the progress - ( - _room_id, - _user_id, - _sender, - _membership_event_id, - _membership, - membership_event_stream_ordering, - _is_outlier, - ) = memberships_to_update_rows[-1] - await self.db_pool.updates._background_update_progress( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - {"last_event_stream_ordering": membership_event_stream_ordering}, - ) - - return len(memberships_to_update_rows) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 3d3f95f29ca..a2122de7ee7 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -36,7 +36,7 @@ from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.events import DeltaState -from synapse.storage.databases.state.bg_updates import _BackgroundUpdates +from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.types import StateMap from synapse.util import Clock From d3f90e4bd8a89673c71663681fe8130e4352276c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 14:39:54 -0500 Subject: [PATCH 070/142] Get full events for `_sliding_sync_joined_rooms_backfill` --- synapse/storage/databases/main/events.py | 95 ++------ .../databases/main/events_bg_updates.py | 229 ++++++++++-------- .../storage/databases/main/state_deltas.py | 93 ++++--- synapse/storage/databases/main/stream.py | 61 +++++ 4 files changed, 283 insertions(+), 195 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b87fa315322..481aabe7cde 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1549,20 +1549,24 @@ def _update_current_state_txn( txn, {m for m in members_to_cache_bust if not self.hs.is_mine_id(m)} ) - # TODO: We can probably remove this function in favor of other stuff. - # TODO: This doesn't take into account redactions @classmethod def _get_relevant_sliding_sync_current_state_event_ids_txn( cls, txn: LoggingTransaction, room_id: str - ) -> MutableStateMap[str]: + ) -> Tuple[MutableStateMap[str], int]: """ Fetch the current state event IDs for the relevant (to the `sliding_sync_joined_rooms` table) state types for the given room. Returns: - StateMap of event IDs necessary to to fetch the relevant state values needed - to insert into the - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots`. + A tuple of: + 1. StateMap of event IDs necessary to to fetch the relevant state values + needed to insert into the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots`. + 2. The corresponding latest `stream_id` in the + `current_state_delta_stream` table. This is useful to compare against + the `current_state_delta_stream` table later so you can check whether + the current state has changed since you last fetched the current + state. """ # Fetch the current state event IDs from the database ( @@ -1587,75 +1591,25 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( (event_type, state_key): event_id for event_id, event_type, state_key in txn } - return current_state_map - - # TODO: We can probably remove this function in favor of other stuff. - # TODO: Should we put this next to the other `_get_sliding_sync_*` function? - @classmethod - def _get_sliding_sync_insert_values_from_state_ids_map_txn( - cls, txn: LoggingTransaction, state_map: StateMap[str] - ) -> SlidingSyncStateInsertValues: - """ - Fetch events in the `state_map` and extract the relevant state values needed to - insert into the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` - tables. - - Returns: - Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant - state values needed to insert into - the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. - """ - # Map of values to insert/update in the `sliding_sync_membership_snapshots` table - sliding_sync_insert_map: SlidingSyncStateInsertValues = {} - # Fetch the raw event JSON from the database - ( - event_id_in_list_clause, - event_id_args, - ) = make_in_list_sql_clause( - txn.database_engine, - "event_id", - state_map.values(), - ) txn.execute( - f""" - SELECT type, state_key, json FROM event_json - INNER JOIN events USING (event_id) - WHERE {event_id_in_list_clause} + """ + SELECT stream_id + FROM current_state_delta_stream + WHERE + room_id = ? + ORDER BY stream_id DESC + LIMIT 1 """, - event_id_args, + (room_id,), ) + row = txn.fetchone() + # If we're able to fetch the `current_state_events` above, we should have rows + # in `current_state_delta_stream` as well. + assert row, "Failed to fetch the `last_current_state_delta_stream_id`" + last_current_state_delta_stream_id = row[0] - # Parse the raw event JSON - for row in txn: - event_type, state_key, json = row - event_json = db_to_json(json) - - if event_type == EventTypes.Create: - room_type = event_json.get("content", {}).get( - EventContentFields.ROOM_TYPE - ) - sliding_sync_insert_map["room_type"] = room_type - elif event_type == EventTypes.RoomEncryption: - encryption_algorithm = event_json.get("content", {}).get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is_encrypted = encryption_algorithm is not None - sliding_sync_insert_map["is_encrypted"] = is_encrypted - elif event_type == EventTypes.Name: - room_name = event_json.get("content", {}).get( - EventContentFields.ROOM_NAME - ) - sliding_sync_insert_map["room_name"] = room_name - else: - # We only expect to see events according to the - # `SLIDING_SYNC_RELEVANT_STATE_SET`. - raise AssertionError( - f"Unexpected event (we should not be fetching extra events): ({event_type}, {state_key})" - ) - - return sliding_sync_insert_map + return current_state_map, last_current_state_delta_stream_id - # TODO: Should we put this next to the other `_get_sliding_sync_*` functions? @classmethod def _get_sliding_sync_insert_values_from_state_map( cls, state_map: StateMap[EventBase] @@ -1699,7 +1653,6 @@ def _get_sliding_sync_insert_values_from_state_map( return sliding_sync_insert_map - # TODO: Should we put this next to the other `_get_sliding_sync_*` function? @classmethod def _get_sliding_sync_insert_values_from_stripped_state_txn( cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 8e12645d6b3..b56325a4a2c 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -39,11 +39,12 @@ PersistEventsStore, SlidingSyncMembershipInfo, SlidingSyncMembershipSnapshotSharedInsertValues, + SlidingSyncStateInsertValues, ) -from synapse.storage.databases.main.events_worker import EventsWorkerStore -from synapse.storage.engines import BaseDatabaseEngine +from synapse.storage.databases.main.state_deltas import StateDeltasStore +from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.types import Cursor -from synapse.types import JsonDict, StateMap, StrCollection +from synapse.types import JsonDict, RoomStreamToken, StateMap, StrCollection from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.types.state import StateFilter @@ -111,7 +112,7 @@ class _CalculateChainCover: finished_room_map: Dict[str, Tuple[int, int]] -class EventsBackgroundUpdatesStore(EventsWorkerStore, SQLBaseStore): +class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseStore): def __init__( self, database: DatabasePool, @@ -1549,46 +1550,7 @@ async def _sliding_sync_joined_rooms_backfill( """ last_room_id = progress.get("last_room_id", "") - def make_sql_clause_for_get_last_event_pos_in_room( - database_engine: BaseDatabaseEngine, - event_types: Optional[StrCollection] = None, - ) -> Tuple[str, list]: - """ - Returns the ID and event position of the last event in a room at or before a - stream ordering. - - Based on `get_last_event_pos_in_room_before_stream_ordering(...)` - - Args: - database_engine - event_types: Optional allowlist of event types to filter by - - Returns: - A tuple of SQL query and the args - """ - event_type_clause = "" - event_type_args: List[str] = [] - if event_types is not None and len(event_types) > 0: - event_type_clause, event_type_args = make_in_list_sql_clause( - database_engine, "type", event_types - ) - event_type_clause = f"AND {event_type_clause}" - - sql = f""" - SELECT stream_ordering - FROM events - LEFT JOIN rejections USING (event_id) - WHERE room_id = ? - {event_type_clause} - AND NOT outlier - AND rejections.event_id IS NULL - ORDER BY stream_ordering DESC - LIMIT 1 - """ - - return sql, event_type_args - - def _txn(txn: LoggingTransaction) -> int: + def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: # Fetch the set of room IDs that we want to update txn.execute( """ @@ -1600,85 +1562,162 @@ def _txn(txn: LoggingTransaction) -> int: (last_room_id, batch_size), ) - rooms_to_update_rows = txn.fetchall() - if not rooms_to_update_rows: - return 0 + rooms_to_update_rows = cast(List[Tuple[str]], txn.fetchall()) + + return [row[0] for row in rooms_to_update_rows] + + rooms_to_update = await self.db_pool.runInteraction( + "_sliding_sync_joined_rooms_backfill._get_rooms_to_update_txn", + _get_rooms_to_update_txn, + ) - for (room_id,) in rooms_to_update_rows: - # TODO: Handle redactions - current_state_map = PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn( - txn, room_id + if not rooms_to_update: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL + ) + return 0 + + # Map from room_id to insert/update state values in the `sliding_sync_joined_rooms` table + joined_room_updates: Dict[str, SlidingSyncStateInsertValues] = {} + # Map from room_id to stream_ordering/bump_stamp/last_current_state_delta_stream_id values + joined_room_stream_ordering_updates: Dict[str, Tuple[int, int, int]] = {} + for room_id in rooms_to_update: + current_state_ids_map, last_current_state_delta_stream_id = ( + await self.db_pool.runInteraction( + "_sliding_sync_joined_rooms_backfill._get_relevant_sliding_sync_current_state_event_ids_txn", + PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn, + room_id, ) - # We're iterating over rooms pulled from the current_state_events table - # so we should have some current state for each room - assert current_state_map + ) + # We're iterating over rooms pulled from the current_state_events table + # so we should have some current state for each room + assert current_state_ids_map - sliding_sync_joined_rooms_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_state_ids_map_txn( - txn, current_state_map + fetched_events = await self.get_events(current_state_ids_map.values()) + + current_state_map: StateMap[EventBase] = { + state_key: fetched_events[event_id] + for state_key, event_id in current_state_ids_map.items() + } + + state_insert_values = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map ) - # We should have some insert values for each room, even if they are `None` - assert sliding_sync_joined_rooms_insert_map + ) + # We should have some insert values for each room, even if they are `None` + assert state_insert_values + joined_room_updates[room_id] = state_insert_values + # Figure out the stream_ordering of the latest event in the room + most_recent_event_pos_results = await self.get_last_event_pos_in_room( + room_id, event_types=None + ) + assert ( + most_recent_event_pos_results + ), "We should not be seeing `None` here because the room should at-least have a create event" + # Figure out the latest bump_stamp in the room + bump_stamp_event_pos_results = await self.get_last_event_pos_in_room( + room_id, event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES + ) + assert bump_stamp_event_pos_results, ( + "We should not be seeing `None` here because the room should at-least have a create event " + + "(unless `SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES` no longer includes the room create event)" + ) + joined_room_stream_ordering_updates[room_id] = ( + most_recent_event_pos_results[1].stream, + bump_stamp_event_pos_results[1].stream, + last_current_state_delta_stream_id, + ) + + def _backfill_table_txn(txn: LoggingTransaction) -> None: + last_successful_room_id: Optional[str] = None + for room_id, insert_map in joined_room_updates.items(): ( - most_recent_event_stream_ordering_clause, - most_recent_event_stream_ordering_args, - ) = make_sql_clause_for_get_last_event_pos_in_room( - txn.database_engine, event_types=None - ) - bump_stamp_clause, bump_stamp_args = ( - make_sql_clause_for_get_last_event_pos_in_room( - txn.database_engine, - event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, + event_stream_ordering, + bump_stamp, + last_current_state_delta_stream_id, + ) = joined_room_stream_ordering_updates[room_id] + + # Check if the current state has been updated since we gathered it + state_deltas_since_we_gathered_current_state = ( + self.get_current_state_deltas_for_room_txn( + txn, + room_id, + from_token=RoomStreamToken( + stream=last_current_state_delta_stream_id + ), + to_token=None, ) ) + for state_delta in state_deltas_since_we_gathered_current_state: + # We only need to check if the state is relevant to the + # `sliding_sync_joined_rooms` table. + if ( + state_delta.event_type, + state_delta.state_key, + ) in SLIDING_SYNC_RELEVANT_STATE_SET: + # Save our progress before we exit early + if last_successful_room_id is not None: + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + {"last_room_id": room_id}, + ) + # Raising exception so we can just exit and try again + raise Exception( + "Current state was updated after we gathered it to update " + + "`sliding_sync_joined_rooms` in the background update. " + + "Raising exception so we can just try again." + ) # Pulling keys/values separately is safe and will produce congruent # lists - insert_keys = sliding_sync_joined_rooms_insert_map.keys() - insert_values = sliding_sync_joined_rooms_insert_map.values() - + insert_keys = insert_map.keys() + insert_values = insert_map.values() + # Since we partially update the `sliding_sync_joined_rooms` as new state + # is sent, we need to update the fields `ON CONFLICT`. We just have to be careful + # we're not overwriting it with stale data. sql = f""" INSERT INTO sliding_sync_joined_rooms (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) VALUES ( - ?, - ({most_recent_event_stream_ordering_clause}), - ({bump_stamp_clause}), + ?, ?, ?, {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id) DO UPDATE SET - event_stream_ordering = EXCLUDED.event_stream_ordering, - bump_stamp = EXCLUDED.bump_stamp, + event_stream_ordering = CASE + WHEN event_stream_ordering IS NULL OR event_stream_ordering < EXCLUDED.event_stream_ordering + THEN EXCLUDED.event_stream_ordering + ELSE event_stream_ordering + END, + bump_stamp = CASE + WHEN bump_stamp IS NULL OR bump_stamp < EXCLUDED.bump_stamp + THEN EXCLUDED.bump_stamp + ELSE bump_stamp + END, {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} """ - args = ( - [room_id, room_id] - + most_recent_event_stream_ordering_args - + [room_id] - + bump_stamp_args - + list(insert_values) + args = [room_id, event_stream_ordering, bump_stamp] + list( + insert_values ) txn.execute(sql, args) - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, - {"last_room_id": rooms_to_update_rows[-1][0]}, - ) - - return len(rooms_to_update_rows) + # Keep track of the last successful room_id + last_successful_room_id = room_id - count = await self.db_pool.runInteraction( - "sliding_sync_joined_rooms_backfill", _txn + await self.db_pool.runInteraction( + "sliding_sync_joined_rooms_backfill", _backfill_table_txn ) - if not count: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL - ) + # Update the progress + await self.db_pool.updates._background_update_progress( + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + {"last_room_id": rooms_to_update[-1]}, + ) - return count + return len(rooms_to_update) async def _sliding_sync_membership_snapshots_backfill( self, progress: JsonDict, batch_size: int diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py index eaa13da368f..ba52fff652f 100644 --- a/synapse/storage/databases/main/state_deltas.py +++ b/synapse/storage/databases/main/state_deltas.py @@ -161,45 +161,80 @@ async def get_max_stream_id_in_current_state_deltas(self) -> int: self._get_max_stream_id_in_current_state_deltas_txn, ) - @trace - async def get_current_state_deltas_for_room( - self, room_id: str, from_token: RoomStreamToken, to_token: RoomStreamToken + def get_current_state_deltas_for_room_txn( + self, + txn: LoggingTransaction, + room_id: str, + *, + from_token: Optional[RoomStreamToken], + to_token: Optional[RoomStreamToken], ) -> List[StateDelta]: - """Get the state deltas between two tokens.""" - - if not self._curr_state_delta_stream_cache.has_entity_changed( - room_id, from_token.stream - ): - return [] + """ + Get the state deltas between two tokens. - def get_current_state_deltas_for_room_txn( - txn: LoggingTransaction, - ) -> List[StateDelta]: - sql = """ + (> `from_token` and <= `to_token`) + """ + from_clause = "" + from_args = [] + if from_token is not None: + from_clause = "AND ? < stream_id" + from_args = [from_token.stream] + + to_clause = "" + to_args = [] + if to_token is not None: + to_clause = "AND stream_id <= ?" + to_args = [to_token.get_max_stream_pos()] + + sql = f""" SELECT instance_name, stream_id, type, state_key, event_id, prev_event_id FROM current_state_delta_stream - WHERE room_id = ? AND ? < stream_id AND stream_id <= ? + WHERE room_id = ? {from_clause} {to_clause} ORDER BY stream_id ASC """ - txn.execute( - sql, (room_id, from_token.stream, to_token.get_max_stream_pos()) + txn.execute(sql, [room_id] + from_args + to_args) + + return [ + StateDelta( + stream_id=row[1], + room_id=room_id, + event_type=row[2], + state_key=row[3], + event_id=row[4], + prev_event_id=row[5], ) + for row in txn + if _filter_results_by_stream(from_token, to_token, row[0], row[1]) + ] - return [ - StateDelta( - stream_id=row[1], - room_id=room_id, - event_type=row[2], - state_key=row[3], - event_id=row[4], - prev_event_id=row[5], - ) - for row in txn - if _filter_results_by_stream(from_token, to_token, row[0], row[1]) - ] + @trace + async def get_current_state_deltas_for_room( + self, + room_id: str, + *, + from_token: Optional[RoomStreamToken], + to_token: Optional[RoomStreamToken], + ) -> List[StateDelta]: + """ + Get the state deltas between two tokens. + + (> `from_token` and <= `to_token`) + """ + + if ( + from_token is not None + and not self._curr_state_delta_stream_cache.has_entity_changed( + room_id, from_token.stream + ) + ): + return [] return await self.db_pool.runInteraction( - "get_current_state_deltas_for_room", get_current_state_deltas_for_room_txn + "get_current_state_deltas_for_room", + self.get_current_state_deltas_for_room_txn, + room_id, + from_token=from_token, + to_token=to_token, ) @trace diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 3054174717b..395735cd43c 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1263,6 +1263,67 @@ async def get_last_event_id_in_room_before_stream_ordering( return None + async def get_last_event_pos_in_room( + self, + room_id: str, + event_types: Optional[StrCollection] = None, + ) -> Optional[Tuple[str, PersistedEventPosition]]: + """ + Returns the ID and event position of the last event in a room. + + Based on `get_last_event_pos_in_room_before_stream_ordering(...)` + + Args: + room_id + event_types: Optional allowlist of event types to filter by + + Returns: + The ID of the most recent event and it's position, or None if there are no + events in the room that match the given event types. + """ + + def _get_last_event_pos_in_room_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, PersistedEventPosition]]: + event_type_clause = "" + event_type_args: List[str] = [] + if event_types is not None and len(event_types) > 0: + event_type_clause, event_type_args = make_in_list_sql_clause( + txn.database_engine, "type", event_types + ) + event_type_clause = f"AND {event_type_clause}" + + sql = f""" + SELECT event_id, stream_ordering, instance_name + FROM events + LEFT JOIN rejections USING (event_id) + WHERE room_id = ? + {event_type_clause} + AND NOT outlier + AND rejections.event_id IS NULL + ORDER BY stream_ordering DESC + LIMIT 1 + """ + + txn.execute( + sql, + [room_id] + event_type_args, + ) + + row = cast(Tuple[str, int, str], txn.fetchone()) + event_id, stream_ordering, instance_name = row + + return event_id, PersistedEventPosition( + # If instance_name is null we default to "master" + instance_name or "master", + stream_ordering, + ) + + return await self.db_pool.runInteraction( + "get_last_event_pos_in_room", + _get_last_event_pos_in_room_txn, + ) + @trace async def get_last_event_pos_in_room_before_stream_ordering( self, From cda92af4a6f6b3bab1bd51feb406c09bc27fb4bc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 14:46:50 -0500 Subject: [PATCH 071/142] No need to update `event_stream_ordering`/`bump_stamp` `ON CONFLICT` --- .../databases/main/events_bg_updates.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index b56325a4a2c..2be7c6df3b5 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1664,7 +1664,11 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, {"last_room_id": room_id}, ) - # Raising exception so we can just exit and try again + # Raising exception so we can just exit and try again. It would + # be hard to resolve this within the transaction because we need + # to get full events out that take redactions into account. We + # could add some retry logic here, but it's easier to just let + # the background update try again. raise Exception( "Current state was updated after we gathered it to update " + "`sliding_sync_joined_rooms` in the background update. " @@ -1676,8 +1680,13 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: insert_keys = insert_map.keys() insert_values = insert_map.values() # Since we partially update the `sliding_sync_joined_rooms` as new state - # is sent, we need to update the fields `ON CONFLICT`. We just have to be careful - # we're not overwriting it with stale data. + # is sent, we need to update the state fields `ON CONFLICT`. We just + # have to be careful we're not overwriting it with stale data (see + # `last_current_state_delta_stream_id` check above). + # + # We don't need to update `event_stream_ordering` and `bump_stamp` `ON + # CONFLICT` because if they are present, that means they are already + # up-to-date. sql = f""" INSERT INTO sliding_sync_joined_rooms (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) @@ -1687,16 +1696,6 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: ) ON CONFLICT (room_id) DO UPDATE SET - event_stream_ordering = CASE - WHEN event_stream_ordering IS NULL OR event_stream_ordering < EXCLUDED.event_stream_ordering - THEN EXCLUDED.event_stream_ordering - ELSE event_stream_ordering - END, - bump_stamp = CASE - WHEN bump_stamp IS NULL OR bump_stamp < EXCLUDED.bump_stamp - THEN EXCLUDED.bump_stamp - ELSE bump_stamp - END, {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} """ args = [room_id, event_stream_ordering, bump_stamp] + list( From 772c501bb6c1d1fb2a2fc85025495a6268bdd75a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 15:04:51 -0500 Subject: [PATCH 072/142] Use available `stream_id` See https://github.com/element-hq/synapse/pull/17512#discussion_r1725310035 --- synapse/storage/databases/main/events.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 481aabe7cde..85c8bbef8d1 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1383,17 +1383,7 @@ def _update_current_state_txn( if to_insert: args: List[Any] = [ room_id, - # Even though `Mapping`/`Dict` have no guaranteed order, some - # implementations may preserve insertion order so we're just - # going to choose the best possible answer by using the "first" - # event ID which we will assume will have the greatest - # `stream_ordering`. We really just need *some* answer in case - # we are the first ones inserting into the table because of the - # `NON NULL` constraint on `event_stream_ordering`. In reality, - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` - # is run after this function to update it to the correct latest - # value. - next(iter(to_insert.values())), + stream_id, ] args.extend(iter(insert_values)) @@ -1409,8 +1399,7 @@ def _update_current_state_txn( INSERT INTO sliding_sync_joined_rooms (room_id, event_stream_ordering, {", ".join(insert_keys)}) VALUES ( - ?, - (SELECT stream_ordering FROM events WHERE event_id = ?), + ?, ?, {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id) From f6d7ffd9c53e025d61ae2a6826600b65e8ba8c19 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 16:10:14 -0500 Subject: [PATCH 073/142] Move `_calculate_sliding_sync_table_changes(...)` after we assign `stream_ordering` to events See https://github.com/element-hq/synapse/pull/17512#discussion_r1725728637 --- synapse/storage/controllers/persist_events.py | 240 +----------------- synapse/storage/databases/main/events.py | 223 +++++++++++++++- tests/storage/test_events.py | 36 +-- 3 files changed, 237 insertions(+), 262 deletions(-) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 121d31b5489..ac0919340b1 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -38,7 +38,6 @@ Iterable, List, Optional, - Sequence, Set, Tuple, TypeVar, @@ -65,18 +64,9 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases -from synapse.storage.databases.main.events import ( - SLIDING_SYNC_RELEVANT_STATE_SET, - DeltaState, - PersistEventsStore, - SlidingSyncMembershipInfo, - SlidingSyncMembershipSnapshotSharedInsertValues, - SlidingSyncStateInsertValues, - SlidingSyncTableChanges, -) +from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( - MutableStateMap, PersistedEventPosition, RoomStreamToken, StateMap, @@ -512,8 +502,10 @@ async def _update_current_state( """ state = await self._calculate_current_state(room_id) delta = await self._calculate_state_delta(room_id, state) - sliding_sync_table_changes = await self._calculate_sliding_sync_table_changes( - room_id, [], delta + sliding_sync_table_changes = ( + await self.persist_events_store._calculate_sliding_sync_table_changes( + room_id, [], delta + ) ) await self.persist_events_store.update_current_state( @@ -619,7 +611,6 @@ async def _persist_event_batch( new_forward_extremities = None state_delta_for_room = None - sliding_sync_table_changes = None if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): @@ -633,14 +624,6 @@ async def _persist_event_batch( room_id, chunk ) - if state_delta_for_room is not None: - with Measure(self._clock, "_calculate_sliding_sync_table_changes"): - sliding_sync_table_changes = ( - await self._calculate_sliding_sync_table_changes( - room_id, chunk, state_delta_for_room - ) - ) - with Measure(self._clock, "calculate_chain_cover_index_for_events"): # We now calculate chain ID/sequence numbers for any state events we're # persisting. We ignore out of band memberships as we're not in the room @@ -660,7 +643,6 @@ async def _persist_event_batch( use_negative_stream_ordering=backfilled, inhibit_local_membership_updates=backfilled, new_event_links=new_event_links, - sliding_sync_table_changes=sliding_sync_table_changes, ) return replaced_events @@ -776,218 +758,6 @@ async def _calculate_new_forward_extremities_and_state_delta( return (new_forward_extremities, delta) - async def _calculate_sliding_sync_table_changes( - self, - room_id: str, - events_and_contexts: Sequence[Tuple[EventBase, EventContext]], - delta_state: DeltaState, - ) -> SlidingSyncTableChanges: - """ - Calculate the changes to the `sliding_sync_membership_snapshots` and - `sliding_sync_joined_rooms` tables given the deltas that are going to be used to - update the `current_state_events` table. - - Just a bunch of pre-processing so we so we don't need to spend time in the - transaction itself gathering all of this info. It's also easier to deal with - redactions outside of a transaction. - - Args: - room_id: The room ID currently being processed. - events_and_contexts: List of tuples of (event, context) being persisted. - This is completely optional (you can pass an empty list) and will just - save us from fetching the events from the database if we already have - them. - delta_state: Deltas that are going to be used to update the - `current_state_events` table. - """ - to_insert = delta_state.to_insert - to_delete = delta_state.to_delete - - event_map = {event.event_id: event for event, _ in events_and_contexts} - - # Handle gathering info for the `sliding_sync_membership_snapshots` table - # - # This would only happen if someone was state reset out of the room - user_ids_to_delete_membership_snapshots = [ - state_key - for event_type, state_key in to_delete - if event_type == EventTypes.Member and self.is_mine_id(state_key) - ] - - membership_snapshot_shared_insert_values: ( - SlidingSyncMembershipSnapshotSharedInsertValues - ) = {} - membership_infos_to_insert_membership_snapshots: List[ - SlidingSyncMembershipInfo - ] = [] - if to_insert: - membership_event_id_to_user_id_map: Dict[str, str] = {} - for state_key, event_id in to_insert.items(): - if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): - membership_event_id_to_user_id_map[event_id] = state_key[1] - - event_id_to_sender_map: Dict[str, str] = {} - # In normal event persist scenarios, we should be able to find the - # membership events in the `events_and_contexts` given to us but it's - # possible a state reset happened which added us to the room without a - # corresponding new membership event (reset back to a previous membership). - missing_membership_event_ids: Set[str] = set() - for membership_event_id in membership_event_id_to_user_id_map.keys(): - membership_event = event_map.get(membership_event_id) - if membership_event: - event_id_to_sender_map[membership_event_id] = ( - membership_event.sender - ) - else: - missing_membership_event_ids.add(membership_event_id) - - # Otherwise, we need to find a couple events that we were reset to. - if missing_membership_event_ids: - remaining_event_id_to_sender_map = ( - await self.main_store.get_sender_for_event_ids( - missing_membership_event_ids - ) - ) - # There shouldn't be any missing events - assert ( - remaining_event_id_to_sender_map.keys() - == missing_membership_event_ids - ), missing_membership_event_ids.difference( - remaining_event_id_to_sender_map.keys() - ) - event_id_to_sender_map.update(remaining_event_id_to_sender_map) - - membership_infos_to_insert_membership_snapshots = [ - SlidingSyncMembershipInfo( - user_id=user_id, - sender=event_id_to_sender_map[membership_event_id], - membership_event_id=membership_event_id, - ) - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - ] - - if membership_infos_to_insert_membership_snapshots: - current_state_ids_map: MutableStateMap[str] = dict( - await self.main_store.get_partial_filtered_current_state_ids( - room_id, - state_filter=StateFilter.from_types( - SLIDING_SYNC_RELEVANT_STATE_SET - ), - ) - ) - # Since we fetched the current state before we took `to_insert`/`to_delete` - # into account, we need to do a couple fixups. - # - # Update the current_state_map with what we have `to_delete` - for state_key in to_delete: - current_state_ids_map.pop(state_key, None) - # Update the current_state_map with what we have `to_insert` - for state_key, event_id in to_insert.items(): - if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: - current_state_ids_map[state_key] = event_id - - fetched_events = await self.main_store.get_events( - current_state_ids_map.values() - ) - - current_state_map: StateMap[EventBase] = { - state_key: fetched_events[event_id] - for state_key, event_id in current_state_ids_map.items() - } - - if current_state_map: - state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_map( - current_state_map - ) - membership_snapshot_shared_insert_values.update(state_insert_values) - # We have current state to work from - membership_snapshot_shared_insert_values["has_known_state"] = True - else: - # We don't have any `current_state_events` anymore (previously - # cleared out because of `no_longer_in_room`). This can happen if - # one user is joined and another is invited (some non-join - # membership). If the joined user leaves, we are `no_longer_in_room` - # and `current_state_events` is cleared out. When the invited user - # rejects the invite (leaves the room), we will end up here. - # - # In these cases, we should inherit the meta data from the previous - # snapshot so we shouldn't update any of the state values. When - # using sliding sync filters, this will prevent the room from - # disappearing/appearing just because you left the room. - # - # Ideally, we could additionally assert that we're only here for - # valid non-join membership transitions. - assert delta_state.no_longer_in_room - - # Handle gathering info for the `sliding_sync_joined_rooms` table - # - # We only deal with - # updating the state related columns. The - # `event_stream_ordering`/`bump_stamp` are updated elsewhere in the event - # persisting stack (see - # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) - # - joined_room_updates: SlidingSyncStateInsertValues = {} - if not delta_state.no_longer_in_room: - # Look through the items we're going to insert into the current state to see - # if there is anything that we care about and should also update in the - # `sliding_sync_joined_rooms` table. - current_state_ids_map = {} - for state_key, event_id in to_insert.items(): - if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: - current_state_ids_map[state_key] = event_id - - # Get the full event objects for the current state events - # - # In normal event persist scenarios, we should be able to find the state - # events in the `events_and_contexts` given to us but it's possible a state - # reset happened which that reset back to a previous state. - current_state_map = {} - missing_event_ids: Set[str] = set() - for state_key, event_id in current_state_ids_map.items(): - event = event_map.get(event_id) - if event: - current_state_map[state_key] = event - else: - missing_event_ids.add(event_id) - - # Otherwise, we need to find a couple events that we were reset to. - if missing_event_ids: - remaining_events = await self.main_store.get_events( - current_state_ids_map.values() - ) - # There shouldn't be any missing events - assert ( - remaining_events.keys() == missing_event_ids - ), missing_event_ids.difference(remaining_events.keys()) - for event in remaining_events.values(): - current_state_map[(event.type, event.state_key)] = event - - joined_room_updates = ( - PersistEventsStore._get_sliding_sync_insert_values_from_state_map( - current_state_map - ) - ) - - # If something is being deleted from the state, we need to clear it out - for state_key in to_delete: - if state_key == (EventTypes.Create, ""): - joined_room_updates["room_type"] = None - elif state_key == (EventTypes.RoomEncryption, ""): - joined_room_updates["is_encrypted"] = False - elif state_key == (EventTypes.Name, ""): - joined_room_updates["room_name"] = None - - return SlidingSyncTableChanges( - room_id=room_id, - # For `sliding_sync_joined_rooms` - joined_room_updates=joined_room_updates, - # For `sliding_sync_membership_snapshots` - membership_snapshot_shared_insert_values=membership_snapshot_shared_insert_values, - to_insert_membership_snapshots=membership_infos_to_insert_membership_snapshots, - to_delete_membership_snapshots=user_ids_to_delete_membership_snapshots, - ) - async def _calculate_new_extremities( self, room_id: str, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 85c8bbef8d1..fe93e12eeba 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -32,6 +32,7 @@ Iterable, List, Optional, + Sequence, Set, Tuple, cast, @@ -75,6 +76,7 @@ get_domain_from_id, ) from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES +from synapse.types.state import StateFilter from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.stringutils import non_null_str_or_none @@ -245,7 +247,6 @@ async def _persist_events_and_state_updates( new_event_links: Dict[str, NewEventChainLinks], use_negative_stream_ordering: bool = False, inhibit_local_membership_updates: bool = False, - sliding_sync_table_changes: Optional[SlidingSyncTableChanges], ) -> None: """Persist a set of events alongside updates to the current state and forward extremities tables. @@ -306,6 +307,14 @@ async def _persist_events_and_state_updates( event.internal_metadata.stream_ordering = stream event.internal_metadata.instance_name = self._instance_name + sliding_sync_table_changes = None + if state_delta_for_room is not None: + sliding_sync_table_changes = ( + await self._calculate_sliding_sync_table_changes( + room_id, events_and_contexts, state_delta_for_room + ) + ) + await self.db_pool.runInteraction( "persist_events", self._persist_events_txn, @@ -342,6 +351,218 @@ async def _persist_events_and_state_updates( (room_id,), frozenset(new_forward_extremities) ) + async def _calculate_sliding_sync_table_changes( + self, + room_id: str, + events_and_contexts: Sequence[Tuple[EventBase, EventContext]], + delta_state: DeltaState, + ) -> SlidingSyncTableChanges: + """ + Calculate the changes to the `sliding_sync_membership_snapshots` and + `sliding_sync_joined_rooms` tables given the deltas that are going to be used to + update the `current_state_events` table. + + Just a bunch of pre-processing so we so we don't need to spend time in the + transaction itself gathering all of this info. It's also easier to deal with + redactions outside of a transaction. + + Args: + room_id: The room ID currently being processed. + events_and_contexts: List of tuples of (event, context) being persisted. + This is completely optional (you can pass an empty list) and will just + save us from fetching the events from the database if we already have + them. + delta_state: Deltas that are going to be used to update the + `current_state_events` table. + """ + to_insert = delta_state.to_insert + to_delete = delta_state.to_delete + + event_map = {event.event_id: event for event, _ in events_and_contexts} + + # Handle gathering info for the `sliding_sync_membership_snapshots` table + # + # This would only happen if someone was state reset out of the room + user_ids_to_delete_membership_snapshots = [ + state_key + for event_type, state_key in to_delete + if event_type == EventTypes.Member and self.is_mine_id(state_key) + ] + + membership_snapshot_shared_insert_values: ( + SlidingSyncMembershipSnapshotSharedInsertValues + ) = {} + membership_infos_to_insert_membership_snapshots: List[ + SlidingSyncMembershipInfo + ] = [] + if to_insert: + membership_event_id_to_user_id_map: Dict[str, str] = {} + for state_key, event_id in to_insert.items(): + if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): + membership_event_id_to_user_id_map[event_id] = state_key[1] + + event_id_to_sender_map: Dict[str, str] = {} + # In normal event persist scenarios, we should be able to find the + # membership events in the `events_and_contexts` given to us but it's + # possible a state reset happened which added us to the room without a + # corresponding new membership event (reset back to a previous membership). + missing_membership_event_ids: Set[str] = set() + for membership_event_id in membership_event_id_to_user_id_map.keys(): + membership_event = event_map.get(membership_event_id) + if membership_event: + event_id_to_sender_map[membership_event_id] = ( + membership_event.sender + ) + else: + missing_membership_event_ids.add(membership_event_id) + + # Otherwise, we need to find a couple events that we were reset to. + if missing_membership_event_ids: + remaining_event_id_to_sender_map = ( + await self.store.get_sender_for_event_ids( + missing_membership_event_ids + ) + ) + # There shouldn't be any missing events + assert ( + remaining_event_id_to_sender_map.keys() + == missing_membership_event_ids + ), missing_membership_event_ids.difference( + remaining_event_id_to_sender_map.keys() + ) + event_id_to_sender_map.update(remaining_event_id_to_sender_map) + + membership_infos_to_insert_membership_snapshots = [ + SlidingSyncMembershipInfo( + user_id=user_id, + sender=event_id_to_sender_map[membership_event_id], + membership_event_id=membership_event_id, + ) + for membership_event_id, user_id in membership_event_id_to_user_id_map.items() + ] + + if membership_infos_to_insert_membership_snapshots: + current_state_ids_map: MutableStateMap[str] = dict( + await self.store.get_partial_filtered_current_state_ids( + room_id, + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + ) + ) + # Since we fetched the current state before we took `to_insert`/`to_delete` + # into account, we need to do a couple fixups. + # + # Update the current_state_map with what we have `to_delete` + for state_key in to_delete: + current_state_ids_map.pop(state_key, None) + # Update the current_state_map with what we have `to_insert` + for state_key, event_id in to_insert.items(): + if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: + current_state_ids_map[state_key] = event_id + + fetched_events = await self.store.get_events( + current_state_ids_map.values() + ) + + current_state_map: StateMap[EventBase] = { + state_key: fetched_events[event_id] + for state_key, event_id in current_state_ids_map.items() + } + + if current_state_map: + state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map + ) + membership_snapshot_shared_insert_values.update(state_insert_values) + # We have current state to work from + membership_snapshot_shared_insert_values["has_known_state"] = True + else: + # We don't have any `current_state_events` anymore (previously + # cleared out because of `no_longer_in_room`). This can happen if + # one user is joined and another is invited (some non-join + # membership). If the joined user leaves, we are `no_longer_in_room` + # and `current_state_events` is cleared out. When the invited user + # rejects the invite (leaves the room), we will end up here. + # + # In these cases, we should inherit the meta data from the previous + # snapshot so we shouldn't update any of the state values. When + # using sliding sync filters, this will prevent the room from + # disappearing/appearing just because you left the room. + # + # Ideally, we could additionally assert that we're only here for + # valid non-join membership transitions. + assert delta_state.no_longer_in_room + + # Handle gathering info for the `sliding_sync_joined_rooms` table + # + # We only deal with + # updating the state related columns. The + # `event_stream_ordering`/`bump_stamp` are updated elsewhere in the event + # persisting stack (see + # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) + # + joined_room_updates: SlidingSyncStateInsertValues = {} + if not delta_state.no_longer_in_room: + # Look through the items we're going to insert into the current state to see + # if there is anything that we care about and should also update in the + # `sliding_sync_joined_rooms` table. + current_state_ids_map = {} + for state_key, event_id in to_insert.items(): + if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: + current_state_ids_map[state_key] = event_id + + # Get the full event objects for the current state events + # + # In normal event persist scenarios, we should be able to find the state + # events in the `events_and_contexts` given to us but it's possible a state + # reset happened which that reset back to a previous state. + current_state_map = {} + missing_event_ids: Set[str] = set() + for state_key, event_id in current_state_ids_map.items(): + event = event_map.get(event_id) + if event: + current_state_map[state_key] = event + else: + missing_event_ids.add(event_id) + + # Otherwise, we need to find a couple events that we were reset to. + if missing_event_ids: + remaining_events = await self.store.get_events( + current_state_ids_map.values() + ) + # There shouldn't be any missing events + assert ( + remaining_events.keys() == missing_event_ids + ), missing_event_ids.difference(remaining_events.keys()) + for event in remaining_events.values(): + current_state_map[(event.type, event.state_key)] = event + + joined_room_updates = ( + PersistEventsStore._get_sliding_sync_insert_values_from_state_map( + current_state_map + ) + ) + + # If something is being deleted from the state, we need to clear it out + for state_key in to_delete: + if state_key == (EventTypes.Create, ""): + joined_room_updates["room_type"] = None + elif state_key == (EventTypes.RoomEncryption, ""): + joined_room_updates["is_encrypted"] = False + elif state_key == (EventTypes.Name, ""): + joined_room_updates["room_name"] = None + + return SlidingSyncTableChanges( + room_id=room_id, + # For `sliding_sync_joined_rooms` + joined_room_updates=joined_room_updates, + # For `sliding_sync_membership_snapshots` + membership_snapshot_shared_insert_values=membership_snapshot_shared_insert_values, + to_insert_membership_snapshots=membership_infos_to_insert_membership_snapshots, + to_delete_membership_snapshots=user_ids_to_delete_membership_snapshots, + ) + async def calculate_chain_cover_index_for_events( self, room_id: str, events: Collection[EventBase] ) -> Dict[str, NewEventChainLinks]: diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index a2122de7ee7..9e5c0e2bf81 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -1432,27 +1432,19 @@ def test_joined_room_meta_state_reset(self) -> None: ) ) event_chunk = [message_tuple] - delta_state = DeltaState( - # This is the state reset part. We're removing the room name state. - to_delete=[(EventTypes.Name, "")], - to_insert={}, - ) - assert self.storage_controllers.persistence is not None - sliding_sync_table_changes = self.get_success( - self.storage_controllers.persistence._calculate_sliding_sync_table_changes( - room_id, event_chunk, delta_state - ) - ) self.get_success( self.persist_events_store._persist_events_and_state_updates( room_id, event_chunk, - state_delta_for_room=delta_state, + state_delta_for_room=DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Name, "")], + to_insert={}, + ), new_forward_extremities={message_tuple[0].event_id}, use_negative_stream_ordering=False, inhibit_local_membership_updates=False, new_event_links={}, - sliding_sync_table_changes=sliding_sync_table_changes, ) ) @@ -2680,27 +2672,19 @@ def test_non_join_state_reset(self) -> None: ) ) event_chunk = [message_tuple] - delta_state = DeltaState( - # This is the state reset part. We're removing the room name state. - to_delete=[(EventTypes.Member, user1_id)], - to_insert={}, - ) - assert self.storage_controllers.persistence is not None - sliding_sync_table_changes = self.get_success( - self.storage_controllers.persistence._calculate_sliding_sync_table_changes( - room_id, event_chunk, delta_state - ) - ) self.get_success( self.persist_events_store._persist_events_and_state_updates( room_id, event_chunk, - state_delta_for_room=delta_state, + state_delta_for_room=DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Member, user1_id)], + to_insert={}, + ), new_forward_extremities={message_tuple[0].event_id}, use_negative_stream_ordering=False, inhibit_local_membership_updates=False, new_event_links={}, - sliding_sync_table_changes=sliding_sync_table_changes, ) ) From e7a33282284daf636ebc6937239e5d72a7166249 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 16:20:00 -0500 Subject: [PATCH 074/142] Pre-populate `membership` and `membership_event_stream_ordering` See https://github.com/element-hq/synapse/pull/17512#discussion_r1725311745 --- synapse/storage/databases/main/events.py | 56 ++++++++++--------- .../databases/main/events_bg_updates.py | 14 +++-- .../storage/databases/main/events_worker.py | 28 +--------- 3 files changed, 40 insertions(+), 58 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index fe93e12eeba..62ae371ca36 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -160,6 +160,8 @@ class SlidingSyncMembershipInfo: user_id: str sender: str membership_event_id: str + membership: str + membership_event_stream_ordering: int @attr.s(slots=True, auto_attribs=True) @@ -401,7 +403,7 @@ async def _calculate_sliding_sync_table_changes( if state_key[0] == EventTypes.Member and self.is_mine_id(state_key[1]): membership_event_id_to_user_id_map[event_id] = state_key[1] - event_id_to_sender_map: Dict[str, str] = {} + membership_event_map: Dict[str, EventBase] = {} # In normal event persist scenarios, we should be able to find the # membership events in the `events_and_contexts` given to us but it's # possible a state reset happened which added us to the room without a @@ -410,36 +412,40 @@ async def _calculate_sliding_sync_table_changes( for membership_event_id in membership_event_id_to_user_id_map.keys(): membership_event = event_map.get(membership_event_id) if membership_event: - event_id_to_sender_map[membership_event_id] = ( - membership_event.sender - ) + membership_event_map[membership_event_id] = membership_event else: missing_membership_event_ids.add(membership_event_id) # Otherwise, we need to find a couple events that we were reset to. if missing_membership_event_ids: - remaining_event_id_to_sender_map = ( - await self.store.get_sender_for_event_ids( - missing_membership_event_ids - ) + remaining_events = await self.store.get_events( + missing_membership_event_ids ) # There shouldn't be any missing events assert ( - remaining_event_id_to_sender_map.keys() - == missing_membership_event_ids - ), missing_membership_event_ids.difference( - remaining_event_id_to_sender_map.keys() - ) - event_id_to_sender_map.update(remaining_event_id_to_sender_map) + remaining_events.keys() == missing_membership_event_ids + ), missing_membership_event_ids.difference(remaining_events.keys()) + membership_event_map.update(remaining_events) - membership_infos_to_insert_membership_snapshots = [ - SlidingSyncMembershipInfo( - user_id=user_id, - sender=event_id_to_sender_map[membership_event_id], - membership_event_id=membership_event_id, + for ( + membership_event_id, + user_id, + ) in membership_event_id_to_user_id_map.items(): + # We should only be seeing events with stream_ordering assigned by this point + membership_event_stream_ordering = membership_event_map[ + membership_event_id + ].internal_metadata.stream_ordering + assert membership_event_stream_ordering is not None + + membership_infos_to_insert_membership_snapshots.append( + SlidingSyncMembershipInfo( + user_id=user_id, + sender=membership_event_map[membership_event_id].sender, + membership_event_id=membership_event_id, + membership=membership_event_map[membership_event_id].membership, + membership_event_stream_ordering=membership_event_stream_ordering, + ) ) - for membership_event_id, user_id in membership_event_id_to_user_id_map.items() - ] if membership_infos_to_insert_membership_snapshots: current_state_ids_map: MutableStateMap[str] = dict( @@ -1717,9 +1723,7 @@ def _update_current_state_txn( (room_id, user_id, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) + ?, ?, ?, ?, ? {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) @@ -1734,8 +1738,8 @@ def _update_current_state_txn( room_id, membership_info.user_id, membership_info.membership_event_id, - membership_info.membership_event_id, - membership_info.membership_event_id, + membership_info.membership, + membership_info.membership_event_stream_ordering, ] + list(insert_values) for membership_info in sliding_sync_table_changes.to_insert_membership_snapshots diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 2be7c6df3b5..5d04955bf16 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1960,6 +1960,8 @@ def _find_previous_membership_txn( user_id=user_id, sender=sender, membership_event_id=membership_event_id, + membership=membership, + membership_event_stream_ordering=membership_event_stream_ordering, ) def _backfill_table_txn(txn: LoggingTransaction) -> None: @@ -1967,6 +1969,10 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: room_id, user_id = key membership_info = to_insert_membership_infos[key] membership_event_id = membership_info.membership_event_id + membership = membership_info.membership + membership_event_stream_ordering = ( + membership_info.membership_event_stream_ordering + ) # Pulling keys/values separately is safe and will produce congruent # lists @@ -1980,9 +1986,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: (room_id, user_id, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) + ?, ?, ?, ?, ? {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) @@ -1992,8 +1996,8 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: room_id, user_id, membership_event_id, - membership_event_id, - membership_event_id, + membership, + membership_event_stream_ordering, ] + list(insert_values), ) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 561807a84ef..cf24d845547 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -81,7 +81,7 @@ MultiWriterIdGenerator, ) from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import JsonDict, StrCollection, get_domain_from_id +from synapse.types import JsonDict, get_domain_from_id from synapse.types.state import StateFilter from synapse.util import unwrapFirstError from synapse.util.async_helpers import ObservableDeferred, delay_cancellation @@ -1981,32 +1981,6 @@ async def get_event_ordering(self, event_id: str, room_id: str) -> Tuple[int, in return int(res[0]), int(res[1]) - async def get_sender_for_event_ids( - self, event_ids: StrCollection - ) -> Mapping[str, str]: - """ - Get the sender for a list of event IDs. - Args: - event_ids: The event IDs to look up. - Returns: - A mapping from event ID to event sender. - """ - rows = cast( - List[Tuple[str, str]], - await self.db_pool.simple_select_many_batch( - table="events", - column="event_id", - iterable=event_ids, - retcols=( - "event_id", - "sender", - ), - desc="get_sender_for_event_ids", - ), - ) - - return dict(rows) - async def get_next_event_to_expire(self) -> Optional[Tuple[str, int]]: """Retrieve the entry with the lowest expiry timestamp in the event_expiry table, or None if there's no more event to expire. From 5b1db39bb730e731e7f11488f00f3a9b83be11dd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 16:39:54 -0500 Subject: [PATCH 075/142] Add `sender` column so we can tell leaves from kicks --- synapse/storage/databases/main/events.py | 6 ++++-- synapse/storage/databases/main/events_bg_updates.py | 10 ++++++++-- .../main/delta/87/01_sliding_sync_memberships.sql | 2 ++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 62ae371ca36..909792095c7 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1720,10 +1720,10 @@ def _update_current_state_txn( txn.execute_batch( f""" INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering + (room_id, user_id, sender, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ? {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) @@ -1737,6 +1737,7 @@ def _update_current_state_txn( [ room_id, membership_info.user_id, + membership_info.sender, membership_info.membership_event_id, membership_info.membership, membership_info.membership_event_stream_ordering, @@ -2693,6 +2694,7 @@ def _store_room_members_txn( raw_stripped_state_events = knock_room_state insert_values = { + "sender": event.sender, "membership_event_id": event.event_id, "membership": event.membership, "event_stream_ordering": event.internal_metadata.stream_ordering, diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 5d04955bf16..a2bfaabafdb 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1631,6 +1631,8 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: ) def _backfill_table_txn(txn: LoggingTransaction) -> None: + # Handle updating the `sliding_sync_joined_rooms` table + # last_successful_room_id: Optional[str] = None for room_id, insert_map in joined_room_updates.items(): ( @@ -1965,9 +1967,12 @@ def _find_previous_membership_txn( ) def _backfill_table_txn(txn: LoggingTransaction) -> None: + # Handle updating the `sliding_sync_membership_snapshots` table + # for key, insert_map in to_insert_membership_snapshots.items(): room_id, user_id = key membership_info = to_insert_membership_infos[key] + sender = membership_info.sender membership_event_id = membership_info.membership_event_id membership = membership_info.membership membership_event_stream_ordering = ( @@ -1983,10 +1988,10 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: txn.execute( f""" INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, membership_event_id, membership, event_stream_ordering + (room_id, user_id, sender, membership_event_id, membership, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ? {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) @@ -1995,6 +2000,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: [ room_id, user_id, + sender, membership_event_id, membership, membership_event_stream_ordering, diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 16b3f84c3d2..5fac6af6193 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -60,6 +60,8 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( room_id TEXT NOT NULL REFERENCES rooms(room_id), user_id TEXT NOT NULL, + -- Useful to be able to tell leaves from kicks (where the `user_id` is different from the `sender`) + sender TEXT NOT NULL, membership_event_id TEXT NOT NULL REFERENCES events(event_id), membership TEXT NOT NULL, -- `stream_ordering` of the `membership_event_id` From c612572d1208b77c4b4bd032965d8163f55695fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 17:14:34 -0500 Subject: [PATCH 076/142] Move away from `stream_id` See https://github.com/element-hq/synapse/pull/17512#discussion_r1725806543 --- synapse/storage/databases/main/events.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 909792095c7..55b4c87f555 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1610,7 +1610,24 @@ def _update_current_state_txn( if to_insert: args: List[Any] = [ room_id, - stream_id, + # XXX: We can't use `stream_id` for the `event_stream_ordering` + # here because we have a foreign key constraint on + # `event_stream_ordering` that it should point to a valid event. + # When re-syncing the state of a partial-state room, `stream_id` + # is set to the next possible stream position for a future event + # that doesn't exist yet. + # + # Even though `Mapping`/`Dict` have no guaranteed order, some + # implementations may preserve insertion order so we're just + # going to choose the best possible answer by using the "first" + # event ID which we will assume will have the greatest + # `stream_ordering`. We really just need *some* answer in case + # we are the first ones inserting into the table because of the + # `NON NULL` constraint on `event_stream_ordering`. In reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` + # is run after this function to update it to the correct latest + # value. + next(iter(to_insert.values())), ] args.extend(iter(insert_values)) @@ -1626,7 +1643,8 @@ def _update_current_state_txn( INSERT INTO sliding_sync_joined_rooms (room_id, event_stream_ordering, {", ".join(insert_keys)}) VALUES ( - ?, ?, + ?, + (SELECT stream_ordering FROM events WHERE event_id = ?), {", ".join("?" for _ in insert_values)} ) ON CONFLICT (room_id) From cda2311520a1d30d41d1936c4066d2e36eb5c6f8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 18:21:44 -0500 Subject: [PATCH 077/142] Add `tombstone_successor_room_id` column --- synapse/api/constants.py | 2 + synapse/storage/databases/main/events.py | 41 ++++++++++++++++--- .../delta/87/01_sliding_sync_memberships.sql | 39 ++++++++++++++---- tests/storage/test_events.py | 23 +++++++---- 4 files changed, 85 insertions(+), 20 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 7dcb1e01fda..8e3b404aed3 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -245,6 +245,8 @@ class EventContentFields: # `m.room.encryption`` algorithm field ENCRYPTION_ALGORITHM: Final = "algorithm" + TOMBSTONE_SUCCESSOR_ROOM: Final = "replacement_room" + class EventUnsignedContentFields: """Fields found inside the 'unsigned' data on events""" diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 55b4c87f555..c628892edf9 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -97,14 +97,16 @@ # State event type/key pairs that we need to gather to fill in the # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. -SLIDING_SYNC_RELEVANT_STATE_SET = { - # So we can fill in the `room_type` column in the `sliding_sync_joined_rooms` table +SLIDING_SYNC_RELEVANT_STATE_SET = ( + # So we can fill in the `room_type` column (EventTypes.Create, ""), - # So we can fill in the `is_encrypted` column in the `sliding_sync_joined_rooms` table + # So we can fill in the `is_encrypted` column (EventTypes.RoomEncryption, ""), - # So we can fill in the `room_name` column in the `sliding_sync_joined_rooms` table + # So we can fill in the `room_name` column (EventTypes.Name, ""), -} + # So we can fill in the `tombstone_successor_room_id` column + (EventTypes.Tombstone, ""), +) @attr.s(slots=True, auto_attribs=True) @@ -1877,11 +1879,22 @@ def _get_sliding_sync_insert_values_from_state_map( # Scrutinize JSON values if room_name is None or isinstance(room_name, str): sliding_sync_insert_map["room_name"] = room_name + elif state_key == (EventTypes.Tombstone, ""): + successor_room_id = event.content.get( + EventContentFields.TOMBSTONE_SUCCESSOR_ROOM + ) + # Scrutinize JSON values + if successor_room_id is None or isinstance(successor_room_id, str): + sliding_sync_insert_map["tombstone_successor_room_id"] = ( + successor_room_id + ) else: # We only expect to see events according to the # `SLIDING_SYNC_RELEVANT_STATE_SET`. raise AssertionError( - f"Unexpected event (we should not be fetching extra events): {state_key} {event.event_id}" + "Unexpected event (we should not be fetching extra events or this " + + "piece of code needs to be updated to handle a new event type added " + + "to `SLIDING_SYNC_RELEVANT_STATE_SET`): {state_key} {event.event_id}" ) return sliding_sync_insert_map @@ -1923,6 +1936,8 @@ def _get_sliding_sync_insert_values_from_stripped_state_txn( if create_stripped_event is not None: sliding_sync_insert_map["has_known_state"] = True + # XXX: Keep this up-to-date with `SLIDING_SYNC_RELEVANT_STATE_SET` + # Find the room_type sliding_sync_insert_map["room_type"] = ( create_stripped_event.content.get(EventContentFields.ROOM_TYPE) @@ -1951,6 +1966,20 @@ def _get_sliding_sync_insert_values_from_stripped_state_txn( else None ) + # Find the tombstone_successor_room_id + # Note: This isn't one of the stripped state events according to the spec + # but seems like there is no reason not to support this kind of thing. + tombstone_stripped_event = stripped_state_map.get( + (EventTypes.Tombstone, "") + ) + sliding_sync_insert_map["tombstone_successor_room_id"] = ( + tombstone_stripped_event.content.get( + EventContentFields.TOMBSTONE_SUCCESSOR_ROOM + ) + if tombstone_stripped_event is not None + else None + ) + else: # No strip state provided sliding_sync_insert_map["has_known_state"] = False diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 5fac6af6193..27bf460b2ea 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -27,16 +27,32 @@ CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms( -- The `stream_ordering` of the last event according to the `bump_event_types` bump_stamp BIGINT, -- `m.room.create` -> `content.type` (current state) + -- + -- Useful for the `spaces`/`not_spaces` filter in the Sliding Sync API room_type TEXT, -- `m.room.name` -> `content.name` (current state) + -- + -- Useful for the room meta data and `room_name_like` filter in the Sliding Sync API room_name TEXT, -- `m.room.encryption` -> `content.algorithm` (current state) + -- + -- Useful for the `is_encrypted` filter in the Sliding Sync API is_encrypted BOOLEAN DEFAULT FALSE NOT NULL, - -- FIXME: Maybe we want to add `tombstone_successor_room_id` here to help with `include_old_rooms` - -- (tracked by https://github.com/element-hq/synapse/issues/17540) + -- `m.room.tombstone` -> `content.replacement_room` (according to the current state at the + -- time of the membership). + -- + -- Useful for the `include_old_rooms` functionality in the Sliding Sync API + tombstone_successor_room_id TEXT, PRIMARY KEY (room_id) ); +-- So we can purge rooms easily. +-- +-- The primary key is already `room_id` + +-- So we can sort by `stream_ordering +CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_joined_rooms_event_stream_ordering ON sliding_sync_joined_rooms(event_stream_ordering); + -- A table for storing a snapshot of room meta data (historical current state relevant -- for sliding sync) at the time of a local user's membership. Only has rows for the -- latest membership event for a given local user in a room which matches @@ -72,16 +88,25 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( -- no stripped state was provided for a remote invite/knock (False). has_known_state BOOLEAN DEFAULT FALSE NOT NULL, -- `m.room.create` -> `content.type` (according to the current state at the time of - -- the membership) + -- the membership). + -- + -- Useful for the `spaces`/`not_spaces` filter in the Sliding Sync API room_type TEXT, -- `m.room.name` -> `content.name` (according to the current state at the time of - -- the membership) + -- the membership). + -- + -- Useful for the room meta data and `room_name_like` filter in the Sliding Sync API room_name TEXT, -- `m.room.encryption` -> `content.algorithm` (according to the current state at the - -- time of the membership) + -- time of the membership). + -- + -- Useful for the `is_encrypted` filter in the Sliding Sync API is_encrypted BOOLEAN DEFAULT FALSE NOT NULL, - -- FIXME: Maybe we want to add `tombstone_successor_room_id` here to help with `include_old_rooms` - -- (tracked by https://github.com/element-hq/synapse/issues/17540) + -- `m.room.tombstone` -> `content.replacement_room` (according to the current state at the + -- time of the membership). + -- + -- Useful for the `include_old_rooms` functionality in the Sliding Sync API + tombstone_successor_room_id TEXT, PRIMARY KEY (room_id, user_id) ); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 9e5c0e2bf81..4ed7d19ac54 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -506,12 +506,14 @@ class _SlidingSyncJoinedRoomResult: room_type: Optional[str] room_name: Optional[str] is_encrypted: bool + tombstone_successor_room_id: Optional[str] @attr.s(slots=True, frozen=True, auto_attribs=True) class _SlidingSyncMembershipSnapshotResult: room_id: str user_id: str + sender: str membership_event_id: str membership: str # `event_stream_ordering` is only optional to allow easier semantics when we make @@ -524,6 +526,7 @@ class _SlidingSyncMembershipSnapshotResult: room_type: Optional[str] room_name: Optional[str] is_encrypted: bool + tombstone_successor_room_id: Optional[str] class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): @@ -566,6 +569,7 @@ def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResu "room_type", "room_name", "is_encrypted", + "tombstone_successor_room_id", ), ), ), @@ -579,6 +583,7 @@ def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResu room_type=row[3], room_name=row[4], is_encrypted=bool(row[5]), + tombstone_successor_room_id=row[6], ) for row in rows } @@ -601,6 +606,7 @@ def _get_sliding_sync_membership_snapshots( retcols=( "room_id", "user_id", + "sender", "membership_event_id", "membership", "event_stream_ordering", @@ -608,6 +614,7 @@ def _get_sliding_sync_membership_snapshots( "room_type", "room_name", "is_encrypted", + "tombstone_successor_room_id", ), ), ), @@ -617,13 +624,15 @@ def _get_sliding_sync_membership_snapshots( (row[0], row[1]): _SlidingSyncMembershipSnapshotResult( room_id=row[0], user_id=row[1], - membership_event_id=row[2], - membership=row[3], - event_stream_ordering=row[4], - has_known_state=bool(row[5]), - room_type=row[6], - room_name=row[7], - is_encrypted=bool(row[8]), + sender=row[2], + membership_event_id=row[3], + membership=row[4], + event_stream_ordering=row[5], + has_known_state=bool(row[6]), + room_type=row[7], + room_name=row[8], + is_encrypted=bool(row[9]), + tombstone_successor_room_id=row[10], ) for row in rows } From 513ec8e90630d207aa82fa7a5f7bb03b4007dfd0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 18:51:04 -0500 Subject: [PATCH 078/142] Update tests --- synapse/storage/databases/main/events.py | 3 +- tests/storage/test_events.py | 162 +++++++++++++++++++++-- 2 files changed, 155 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c628892edf9..2965821f848 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -139,6 +139,7 @@ class SlidingSyncStateInsertValues(TypedDict, total=False): room_type: Optional[str] is_encrypted: Optional[bool] room_name: Optional[str] + tombstone_successor_room_id: Optional[str] class SlidingSyncMembershipSnapshotSharedInsertValues( @@ -150,7 +151,6 @@ class SlidingSyncMembershipSnapshotSharedInsertValues( """ has_known_state: Optional[bool] - # TODO: tombstone_successor_room_id: Optional[str] @attr.s(slots=True, auto_attribs=True) @@ -1748,6 +1748,7 @@ def _update_current_state_txn( ) ON CONFLICT (room_id, user_id) DO UPDATE SET + sender = EXCLUDED.sender, membership_event_id = EXCLUDED.membership_event_id, membership = EXCLUDED.membership, event_stream_ordering = EXCLUDED.event_stream_ordering diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 4ed7d19ac54..8e04952e5e4 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -557,7 +557,7 @@ def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResu Mapping from room_id to _SlidingSyncJoinedRoomResult. """ rows = cast( - List[Tuple[str, int, int, str, str, bool]], + List[Tuple[str, int, int, str, str, bool, str]], self.get_success( self.store.db_pool.simple_select_list( "sliding_sync_joined_rooms", @@ -598,7 +598,7 @@ def _get_sliding_sync_membership_snapshots( Mapping from the (room_id, user_id) to _SlidingSyncMembershipSnapshotResult. """ rows = cast( - List[Tuple[str, str, str, str, int, bool, str, str, bool]], + List[Tuple[str, str, str, str, str, int, bool, str, str, bool, str]], self.get_success( self.store.db_pool.simple_select_list( "sliding_sync_membership_snapshots", @@ -799,6 +799,7 @@ def test_joined_room_with_no_info(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -818,6 +819,7 @@ def test_joined_room_with_no_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -827,6 +829,7 @@ def test_joined_room_with_no_info(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -882,6 +885,7 @@ def test_joined_room_with_info(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -902,6 +906,7 @@ def test_joined_room_with_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -911,6 +916,7 @@ def test_joined_room_with_info(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -919,6 +925,7 @@ def test_joined_room_with_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -931,6 +938,7 @@ def test_joined_room_with_info(self) -> None: # this state set yet. room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -985,6 +993,7 @@ def test_joined_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1005,6 +1014,7 @@ def test_joined_space_room_with_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1014,6 +1024,7 @@ def test_joined_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -1022,6 +1033,7 @@ def test_joined_space_room_with_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1033,6 +1045,7 @@ def test_joined_space_room_with_info(self) -> None: # joined at the room creation time which didn't have this state set yet. room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1082,6 +1095,7 @@ def test_joined_room_with_state_updated(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1133,6 +1147,7 @@ def test_joined_room_with_state_updated(self) -> None: room_type=None, room_name="my super duper room was renamed", is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -1153,6 +1168,7 @@ def test_joined_room_with_state_updated(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1162,6 +1178,7 @@ def test_joined_room_with_state_updated(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -1170,6 +1187,7 @@ def test_joined_room_with_state_updated(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1179,6 +1197,7 @@ def test_joined_room_with_state_updated(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1228,6 +1247,7 @@ def test_joined_room_is_bumped(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1246,6 +1266,7 @@ def test_joined_room_is_bumped(self) -> None: user1_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1255,6 +1276,7 @@ def test_joined_room_is_bumped(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), @@ -1264,6 +1286,7 @@ def test_joined_room_is_bumped(self) -> None: user2_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1273,6 +1296,7 @@ def test_joined_room_is_bumped(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), @@ -1304,6 +1328,7 @@ def test_joined_room_is_bumped(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1373,6 +1398,7 @@ def test_joined_room_meta_state_reset(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1390,6 +1416,7 @@ def test_joined_room_meta_state_reset(self) -> None: user1_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1399,6 +1426,7 @@ def test_joined_room_meta_state_reset(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id, user1_id)), @@ -1409,6 +1437,7 @@ def test_joined_room_meta_state_reset(self) -> None: user2_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1418,6 +1447,7 @@ def test_joined_room_meta_state_reset(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id, user2_id)), @@ -1480,6 +1510,7 @@ def test_joined_room_meta_state_reset(self) -> None: # This was state reset back to None room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1579,6 +1610,7 @@ def test_non_join_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space was renamed", is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -1599,6 +1631,7 @@ def test_non_join_space_room_with_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + sender=user2_id, membership_event_id=user1_invited_response["event_id"], membership=Membership.INVITE, event_stream_ordering=user1_invited_event_pos.stream, @@ -1606,6 +1639,7 @@ def test_non_join_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=True, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -1614,6 +1648,7 @@ def test_non_join_space_room_with_info(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1623,6 +1658,7 @@ def test_non_join_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1681,6 +1717,7 @@ def test_non_join_invite_ban(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1702,6 +1739,7 @@ def test_non_join_invite_ban(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user2_id, membership_event_id=user1_invited_response["event_id"], membership=Membership.INVITE, event_stream_ordering=user1_invited_event_pos.stream, @@ -1709,6 +1747,7 @@ def test_non_join_invite_ban(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -1717,6 +1756,7 @@ def test_non_join_invite_ban(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1726,6 +1766,7 @@ def test_non_join_invite_ban(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user was banned @@ -1734,6 +1775,7 @@ def test_non_join_invite_ban(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user3_id, + sender=user2_id, membership_event_id=user3_ban_response["event_id"], membership=Membership.BAN, event_stream_ordering=user3_ban_event_pos.stream, @@ -1741,6 +1783,7 @@ def test_non_join_invite_ban(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1795,6 +1838,7 @@ def test_non_join_reject_invite_empty_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=user1_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user1_leave_event_pos.stream, @@ -1802,6 +1846,7 @@ def test_non_join_reject_invite_empty_room(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the left @@ -1810,6 +1855,7 @@ def test_non_join_reject_invite_empty_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=user2_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user2_leave_event_pos.stream, @@ -1817,6 +1863,7 @@ def test_non_join_reject_invite_empty_room(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1874,6 +1921,7 @@ def test_membership_changing(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1895,6 +1943,7 @@ def test_membership_changing(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user2_id, membership_event_id=user1_invited_response["event_id"], membership=Membership.INVITE, event_stream_ordering=user1_invited_event_pos.stream, @@ -1904,12 +1953,14 @@ def test_membership_changing(self) -> None: # see it unset here room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined user2_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -1919,6 +1970,7 @@ def test_membership_changing(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), @@ -1951,6 +2003,7 @@ def test_membership_changing(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -1972,6 +2025,7 @@ def test_membership_changing(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=user1_joined_response["event_id"], membership=Membership.JOIN, event_stream_ordering=user1_joined_event_pos.stream, @@ -1981,6 +2035,7 @@ def test_membership_changing(self) -> None: # change room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -2017,6 +2072,7 @@ def test_membership_changing(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2038,6 +2094,7 @@ def test_membership_changing(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user2_id, membership_event_id=user1_ban_response["event_id"], membership=Membership.BAN, event_stream_ordering=user1_ban_event_pos.stream, @@ -2047,6 +2104,7 @@ def test_membership_changing(self) -> None: # change room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) # Holds the info according to the current state when the user joined @@ -2108,6 +2166,7 @@ def test_non_join_server_left_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user1_id, + sender=user1_id, membership_event_id=user1_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user1_leave_event_pos.stream, @@ -2115,6 +2174,7 @@ def test_non_join_server_left_room(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -2122,6 +2182,7 @@ def test_non_join_server_left_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id1, user_id=user2_id, + sender=user2_id, membership_event_id=user2_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user2_leave_event_pos.stream, @@ -2129,6 +2190,7 @@ def test_non_join_server_left_room(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2180,6 +2242,7 @@ def test_non_join_remote_invite_no_stripped_state( _SlidingSyncMembershipSnapshotResult( room_id=remote_invite_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=remote_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, @@ -2188,6 +2251,7 @@ def test_non_join_remote_invite_no_stripped_state( room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2251,6 +2315,7 @@ def test_non_join_remote_invite_unencrypted_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=remote_invite_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=remote_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, @@ -2258,6 +2323,7 @@ def test_non_join_remote_invite_unencrypted_room(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2321,6 +2387,7 @@ def test_non_join_remote_invite_encrypted_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=remote_invite_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=remote_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, @@ -2328,6 +2395,7 @@ def test_non_join_remote_invite_encrypted_room(self) -> None: room_type=None, room_name=None, is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -2401,6 +2469,7 @@ def test_non_join_remote_invite_space_room(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=remote_invite_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=remote_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, @@ -2408,6 +2477,7 @@ def test_non_join_remote_invite_space_room(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -2480,6 +2550,7 @@ def test_non_join_rejected_remote_invite(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=remote_invite_room_id, user_id=user1_id, + sender=user1_id, membership_event_id=user1_leave_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=user1_leave_pos.stream, @@ -2487,6 +2558,7 @@ def test_non_join_rejected_remote_invite(self) -> None: room_type=None, room_name=None, is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -2558,6 +2630,7 @@ def test_non_join_retracted_remote_invite(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=remote_invite_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=remote_invite_retraction_event.event_id, membership=Membership.LEAVE, event_stream_ordering=remote_invite_retraction_event.internal_metadata.stream_ordering, @@ -2565,6 +2638,7 @@ def test_non_join_retracted_remote_invite(self) -> None: room_type=None, room_name=None, is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -2613,6 +2687,7 @@ def test_non_join_state_reset(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2630,6 +2705,7 @@ def test_non_join_state_reset(self) -> None: user1_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -2639,6 +2715,7 @@ def test_non_join_state_reset(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id, user1_id)), @@ -2649,6 +2726,7 @@ def test_non_join_state_reset(self) -> None: user2_snapshot = _SlidingSyncMembershipSnapshotResult( room_id=room_id, user_id=user2_id, + sender=user2_id, membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -2658,6 +2736,7 @@ def test_non_join_state_reset(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ) self.assertEqual( sliding_sync_membership_snapshots_results.get((room_id, user2_id)), @@ -2719,6 +2798,7 @@ def test_non_join_state_reset(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2840,6 +2920,7 @@ def test_joined_background_update_missing(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) state_map = self.get_success( @@ -2859,6 +2940,7 @@ def test_joined_background_update_missing(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) state_map = self.get_success( @@ -2878,6 +2960,7 @@ def test_joined_background_update_missing(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2943,6 +3026,7 @@ def test_joined_background_update_partial(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -2980,6 +3064,7 @@ def test_joined_background_update_partial(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) @@ -3083,6 +3168,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id_no_info, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -3092,6 +3178,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) state_map = self.get_success( @@ -3104,6 +3191,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id_with_info, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -3113,6 +3201,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) state_map = self.get_success( @@ -3123,6 +3212,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + sender=user1_id, membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, membership=Membership.JOIN, event_stream_ordering=state_map[ @@ -3132,6 +3222,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -3269,6 +3360,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id_no_info, user_id=user1_id, + sender=user2_id, membership_event_id=user1_invite_room_id_no_info_response["event_id"], membership=Membership.INVITE, event_stream_ordering=self.get_success( @@ -3280,6 +3372,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3289,6 +3382,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=room_id_with_info, user_id=user1_id, + sender=user2_id, membership_event_id=user1_invite_room_id_with_info_response["event_id"], membership=Membership.INVITE, event_stream_ordering=self.get_success( @@ -3300,6 +3394,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3307,6 +3402,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + sender=user2_id, membership_event_id=user1_invite_space_room_id_response["event_id"], membership=Membership.INVITE, event_stream_ordering=self.get_success( @@ -3318,6 +3414,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -3476,6 +3573,7 @@ def test_membership_snapshots_background_update_remote_invite( _SlidingSyncMembershipSnapshotResult( room_id=room_id_unknown_state, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=room_id_unknown_state_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=room_id_unknown_state_invite_event.internal_metadata.stream_ordering, @@ -3483,6 +3581,7 @@ def test_membership_snapshots_background_update_remote_invite( room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3490,6 +3589,7 @@ def test_membership_snapshots_background_update_remote_invite( _SlidingSyncMembershipSnapshotResult( room_id=room_id_no_info, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=room_id_no_info_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=room_id_no_info_invite_event.internal_metadata.stream_ordering, @@ -3497,6 +3597,7 @@ def test_membership_snapshots_background_update_remote_invite( room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3506,6 +3607,7 @@ def test_membership_snapshots_background_update_remote_invite( _SlidingSyncMembershipSnapshotResult( room_id=room_id_with_info, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=room_id_with_info_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=room_id_with_info_invite_event.internal_metadata.stream_ordering, @@ -3513,6 +3615,7 @@ def test_membership_snapshots_background_update_remote_invite( room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3520,6 +3623,7 @@ def test_membership_snapshots_background_update_remote_invite( _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=space_room_id_invite_event.event_id, membership=Membership.INVITE, event_stream_ordering=space_room_id_invite_event.internal_metadata.stream_ordering, @@ -3527,6 +3631,7 @@ def test_membership_snapshots_background_update_remote_invite( room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @@ -3702,6 +3807,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret _SlidingSyncMembershipSnapshotResult( room_id=room_id_unknown_state, user_id=user1_id, + sender=user1_id, membership_event_id=room_id_unknown_state_leave_event_response[ "event_id" ], @@ -3715,6 +3821,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3722,6 +3829,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret _SlidingSyncMembershipSnapshotResult( room_id=room_id_no_info, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=room_id_no_info_leave_event.event_id, membership=Membership.LEAVE, event_stream_ordering=room_id_no_info_leave_event.internal_metadata.stream_ordering, @@ -3729,6 +3837,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3738,6 +3847,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret _SlidingSyncMembershipSnapshotResult( room_id=room_id_with_info, user_id=user1_id, + sender=user1_id, membership_event_id=room_id_with_info_leave_event_response["event_id"], membership=Membership.LEAVE, event_stream_ordering=self.get_success( @@ -3749,6 +3859,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3756,6 +3867,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + sender="@inviter:remote_server", membership_event_id=space_room_id_leave_event.event_id, membership=Membership.LEAVE, event_stream_ordering=space_room_id_leave_event.internal_metadata.stream_ordering, @@ -3763,11 +3875,13 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) @parameterized.expand( [ + # We'll do a kick for this (Membership.LEAVE,), (Membership.BAN,), ] @@ -3825,15 +3939,36 @@ def test_membership_snapshots_background_update_historical_state( self.helper.join(space_room_id, user1_id, tok=user1_tok) if test_membership == Membership.LEAVE: - # Have user1 leave the rooms - user1_membership_room_id_no_info_response = self.helper.leave( - room_id_no_info, user1_id, tok=user1_tok + # Kick user1 from the rooms + user1_membership_room_id_no_info_response = self.helper.change_membership( + room=room_id_no_info, + src=user2_id, + targ=user1_id, + tok=user2_tok, + membership=Membership.LEAVE, + extra_data={ + "reason": "Bad manners", + }, ) - user1_membership_room_id_with_info_response = self.helper.leave( - room_id_with_info, user1_id, tok=user1_tok + user1_membership_room_id_with_info_response = self.helper.change_membership( + room=room_id_with_info, + src=user2_id, + targ=user1_id, + tok=user2_tok, + membership=Membership.LEAVE, + extra_data={ + "reason": "Bad manners", + }, ) - user1_membership_space_room_id_response = self.helper.leave( - space_room_id, user1_id, tok=user1_tok + user1_membership_space_room_id_response = self.helper.change_membership( + room=space_room_id, + src=user2_id, + targ=user1_id, + tok=user2_tok, + membership=Membership.LEAVE, + extra_data={ + "reason": "Bad manners", + }, ) elif test_membership == Membership.BAN: # Ban user1 from the rooms @@ -3927,6 +4062,8 @@ def test_membership_snapshots_background_update_historical_state( _SlidingSyncMembershipSnapshotResult( room_id=room_id_no_info, user_id=user1_id, + # Because user2 kicked/banned user1 from the room + sender=user2_id, membership_event_id=user1_membership_room_id_no_info_response[ "event_id" ], @@ -3940,6 +4077,7 @@ def test_membership_snapshots_background_update_historical_state( room_type=None, room_name=None, is_encrypted=False, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3949,6 +4087,8 @@ def test_membership_snapshots_background_update_historical_state( _SlidingSyncMembershipSnapshotResult( room_id=room_id_with_info, user_id=user1_id, + # Because user2 kicked/banned user1 from the room + sender=user2_id, membership_event_id=user1_membership_room_id_with_info_response[ "event_id" ], @@ -3962,6 +4102,7 @@ def test_membership_snapshots_background_update_historical_state( room_type=None, room_name="my super duper room", is_encrypted=True, + tombstone_successor_room_id=None, ), ) self.assertEqual( @@ -3969,6 +4110,8 @@ def test_membership_snapshots_background_update_historical_state( _SlidingSyncMembershipSnapshotResult( room_id=space_room_id, user_id=user1_id, + # Because user2 kicked/banned user1 from the room + sender=user2_id, membership_event_id=user1_membership_space_room_id_response["event_id"], membership=test_membership, event_stream_ordering=self.get_success( @@ -3980,5 +4123,6 @@ def test_membership_snapshots_background_update_historical_state( room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=False, + tombstone_successor_room_id=None, ), ) From 8ddf5c72353d3db5857524fa8bbaae1886199352 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 19:05:59 -0500 Subject: [PATCH 079/142] Add tombstone to tests --- tests/storage/test_events.py | 80 +++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 11 deletions(-) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 8e04952e5e4..4ff7f03b712 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -857,6 +857,13 @@ def test_joined_room_with_info(self) -> None: {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user2_tok, ) + # Add a tombstone + self.helper.send_state( + room_id1, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) # User1 joins the room self.helper.join(room_id1, user1_id, tok=user1_tok) @@ -885,7 +892,7 @@ def test_joined_room_with_info(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) @@ -916,7 +923,7 @@ def test_joined_room_with_info(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) # Holds the info according to the current state when the user joined @@ -933,9 +940,9 @@ def test_joined_room_with_info(self) -> None: ].internal_metadata.stream_ordering, has_known_state=True, room_type=None, - # Even though this room does have a name and is encrypted, user2 is the - # room creator and joined at the room creation time which didn't have - # this state set yet. + # Even though this room does have a name, is encrypted, and has a + # tombstone, user2 is the room creator and joined at the room creation + # time which didn't have this state set yet. room_name=None, is_encrypted=False, tombstone_successor_room_id=None, @@ -1566,6 +1573,13 @@ def test_non_join_space_room_with_info(self) -> None: {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user2_tok, ) + # Add a tombstone + self.helper.send_state( + space_room_id, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) # User1 is invited to the room user1_invited_response = self.helper.invite( @@ -1610,7 +1624,7 @@ def test_non_join_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space was renamed", is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) @@ -1639,7 +1653,7 @@ def test_non_join_space_room_with_info(self) -> None: room_type=RoomTypes.SPACE, room_name="my super duper space", is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) # Holds the info according to the current state when the user joined @@ -2358,6 +2372,25 @@ def test_non_join_remote_invite_encrypted_room(self) -> None: EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", }, ), + # This is not one of the stripped state events according to the state + # but we still handle it. + StrippedStateEvent( + type=EventTypes.Tombstone, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room", + }, + ), + # Also test a random event that we don't care about + StrippedStateEvent( + type="org.matrix.foo_state", + state_key="", + sender="@inviter:remote_server", + content={ + "foo": "qux", + }, + ), ], ) ) @@ -2395,7 +2428,7 @@ def test_non_join_remote_invite_encrypted_room(self) -> None: room_type=None, room_name=None, is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) @@ -2481,7 +2514,7 @@ def test_non_join_remote_invite_space_room(self) -> None: ), ) - def test_non_join_rejected_remote_invite(self) -> None: + def test_non_join_reject_remote_invite(self) -> None: """ Test rejected remote invite (user decided to leave the room) inherits meta data from when the remote invite stripped state and shows up in @@ -3095,6 +3128,13 @@ def test_membership_snapshots_background_update_joined(self) -> None: {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user1_tok, ) + # Add a tombstone + self.helper.send_state( + room_id_with_info, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user1_tok, + ) space_room_id = self.helper.create_room_as( user1_id, @@ -3201,7 +3241,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) state_map = self.get_success( @@ -3255,6 +3295,13 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user2_tok, ) + # Add a tombstone + self.helper.send_state( + room_id_with_info, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) space_room_id = self.helper.create_room_as( user1_id, @@ -3394,6 +3441,10 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: room_type=None, room_name="my super duper room", is_encrypted=True, + # The tombstone isn't showing here ("another_room") because it's not one + # of the stripped events that we hand out as part of the invite event. + # Even though we handle this scenario from other remote homservers, + # Synapse does not include the tombstone in the invite event. tombstone_successor_room_id=None, ), ) @@ -3917,6 +3968,13 @@ def test_membership_snapshots_background_update_historical_state( {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user2_tok, ) + # Add a tombstone + self.helper.send_state( + room_id_with_info, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) space_room_id = self.helper.create_room_as( user1_id, @@ -4102,7 +4160,7 @@ def test_membership_snapshots_background_update_historical_state( room_type=None, room_name="my super duper room", is_encrypted=True, - tombstone_successor_room_id=None, + tombstone_successor_room_id="another_room", ), ) self.assertEqual( From 02711552cf001615da612ea8a65987a2f2fdea13 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 19:11:08 -0500 Subject: [PATCH 080/142] Better handle none case --- synapse/storage/databases/main/stream.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 395735cd43c..879f6febe45 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1310,14 +1310,17 @@ def _get_last_event_pos_in_room_txn( [room_id] + event_type_args, ) - row = cast(Tuple[str, int, str], txn.fetchone()) - event_id, stream_ordering, instance_name = row + row = cast(Optional[Tuple[str, int, str]], txn.fetchone()) + if row is not None: + event_id, stream_ordering, instance_name = row - return event_id, PersistedEventPosition( - # If instance_name is null we default to "master" - instance_name or "master", - stream_ordering, - ) + return event_id, PersistedEventPosition( + # If instance_name is null we default to "master" + instance_name or "master", + stream_ordering, + ) + + return None return await self.db_pool.runInteraction( "get_last_event_pos_in_room", From 97248362d09099177c1903509594abe512a6fa5f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 19:26:14 -0500 Subject: [PATCH 081/142] Log which room is strange --- synapse/storage/databases/main/events_bg_updates.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index a2bfaabafdb..bb881e4b526 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1613,15 +1613,16 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: most_recent_event_pos_results = await self.get_last_event_pos_in_room( room_id, event_types=None ) - assert ( - most_recent_event_pos_results - ), "We should not be seeing `None` here because the room should at-least have a create event" + assert most_recent_event_pos_results, ( + f"We should not be seeing `None` here because the room ({room_id}) should at-least have a create event " + + "given we pulled the room out of `current_state_events`" + ) # Figure out the latest bump_stamp in the room bump_stamp_event_pos_results = await self.get_last_event_pos_in_room( room_id, event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES ) assert bump_stamp_event_pos_results, ( - "We should not be seeing `None` here because the room should at-least have a create event " + f"We should not be seeing `None` here because the room ({room_id}) should at-least have a create event " + "(unless `SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES` no longer includes the room create event)" ) joined_room_stream_ordering_updates[room_id] = ( From 0a938b137adca0f1e4044169fc60e3804e5d4314 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 19:59:27 -0500 Subject: [PATCH 082/142] Add missing boolean column to portdb script --- synapse/_scripts/synapse_port_db.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 49088dc506e..195c95d3766 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -130,7 +130,10 @@ "room_stats_state": ["is_federatable"], "rooms": ["is_public", "has_auth_chain_index"], "sliding_sync_joined_rooms": ["is_encrypted"], - "sliding_sync_membership_snapshots": ["is_encrypted"], + "sliding_sync_membership_snapshots": [ + "has_known_state", + "is_encrypted", + ], "users": ["shadow_banned", "approved", "locked", "suspended"], "un_partial_stated_event_stream": ["rejection_status_changed"], "users_who_share_rooms": ["share_private"], From ee2ef0b4d91f2519348780cc6f1f049d6afd7cf0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 21:54:22 -0500 Subject: [PATCH 083/142] Add `forgotten` column --- .../databases/main/events_bg_updates.py | 20 +- synapse/storage/databases/main/roommember.py | 6 + .../delta/87/01_sliding_sync_memberships.sql | 3 + tests/storage/test_events.py | 446 +++++++++++++++++- 4 files changed, 459 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index bb881e4b526..253c324f9a3 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1984,19 +1984,27 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: # lists insert_keys = insert_map.keys() insert_values = insert_map.values() - # We don't need to do anything `ON CONFLICT` because we never partially - # insert/update the snapshots + # We don't need to update the state `ON CONFLICT` because we never + # partially insert/update the snapshots and anything already there is + # up-to-date EXCEPT for the `forgotten` field since that is updated out + # of band from the membership changes. + # + # We need to find the `forgotten` value during the transaction because + # we can't risk inserting stale data. txn.execute( f""" INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, sender, membership_event_id, membership, event_stream_ordering + (room_id, user_id, sender, membership_event_id, membership, forgotten, event_stream_ordering {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( - ?, ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, + (SELECT forgotten FROM room_memberships WHERE room_id = ? AND user_id = ?), + ? {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) ON CONFLICT (room_id, user_id) - DO NOTHING + DO UPDATE SET + forgotten = EXCLUDED.forgotten """, [ room_id, @@ -2004,6 +2012,8 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: sender, membership_event_id, membership, + room_id, + user_id, membership_event_stream_ordering, ] + list(insert_values), diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 1d9f0f52e19..5a0d7c7df36 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1337,6 +1337,12 @@ def f(txn: LoggingTransaction) -> None: keyvalues={"user_id": user_id, "room_id": room_id}, updatevalues={"forgotten": 1}, ) + self.db_pool.simple_update_txn( + txn, + table="sliding_sync_membership_snapshots", + keyvalues={"user_id": user_id, "room_id": room_id}, + updatevalues={"forgotten": 1}, + ) self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) self._invalidate_cache_and_stream( diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 27bf460b2ea..680f0ea8d16 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -80,6 +80,9 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( sender TEXT NOT NULL, membership_event_id TEXT NOT NULL REFERENCES events(event_id), membership TEXT NOT NULL, + -- This is an integer just to match `room_memberships` and also means we don't need + -- to do any casting. + forgotten INTEGER DEFAULT 0 NOT NULL, -- `stream_ordering` of the `membership_event_id` event_stream_ordering BIGINT NOT NULL REFERENCES events(stream_ordering), -- For remote invites/knocks that don't include any stripped state, we want to be diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 4ff7f03b712..2b8e93e3ed5 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -527,6 +527,9 @@ class _SlidingSyncMembershipSnapshotResult: room_name: Optional[str] is_encrypted: bool tombstone_successor_room_id: Optional[str] + # Make this default to "not forgotten" because it doesn't apply to many tests and we + # don't want to force all of the tests to deal with it. + forgotten: bool = False class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): @@ -598,7 +601,7 @@ def _get_sliding_sync_membership_snapshots( Mapping from the (room_id, user_id) to _SlidingSyncMembershipSnapshotResult. """ rows = cast( - List[Tuple[str, str, str, str, str, int, bool, str, str, bool, str]], + List[Tuple[str, str, str, str, str, int, int, bool, str, str, bool, str]], self.get_success( self.store.db_pool.simple_select_list( "sliding_sync_membership_snapshots", @@ -609,6 +612,7 @@ def _get_sliding_sync_membership_snapshots( "sender", "membership_event_id", "membership", + "forgotten", "event_stream_ordering", "has_known_state", "room_type", @@ -627,12 +631,13 @@ def _get_sliding_sync_membership_snapshots( sender=row[2], membership_event_id=row[3], membership=row[4], - event_stream_ordering=row[5], - has_known_state=bool(row[6]), - room_type=row[7], - room_name=row[8], - is_encrypted=bool(row[9]), - tombstone_successor_room_id=row[10], + forgotten=bool(row[5]), + event_stream_ordering=row[6], + has_known_state=bool(row[7]), + room_type=row[8], + room_name=row[9], + is_encrypted=bool(row[10]), + tombstone_successor_room_id=row[11], ) for row in rows } @@ -3356,7 +3361,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: column="room_id", iterable=(room_id_no_info, room_id_with_info, space_room_id), keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_local_invite", ) ) @@ -3574,7 +3579,7 @@ def test_membership_snapshots_background_update_remote_invite( space_room_id, ), keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_remote_invite", ) ) @@ -3808,7 +3813,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret space_room_id, ), keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_remote_invite_rejections_and_retractions", ) ) @@ -4069,7 +4074,7 @@ def test_membership_snapshots_background_update_historical_state( column="room_id", iterable=(room_id_no_info, room_id_with_info, space_room_id), keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_invite", + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_historical_state", ) ) @@ -4184,3 +4189,422 @@ def test_membership_snapshots_background_update_historical_state( tombstone_successor_room_id=None, ), ) + + def test_membership_snapshots_background_update_forgotten_missing(self) -> None: + """ + Test that a new row is inserted into `sliding_sync_membership_snapshots` when it + doesn't exist in the table yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is forgotten + forgotten=True, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_forgotten_partial(self) -> None: + """ + Test an existing `sliding_sync_membership_snapshots` row is updated with the + latest `forgotten` status after the background update passes over it. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Clean-up the `sliding_sync_joined_rooms` table as if the forgotten status + # never made it into the table. + self.get_success( + self.store.db_pool.simple_update( + table="sliding_sync_membership_snapshots", + keyvalues={"room_id": room_id}, + updatevalues={"forgotten": 0}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_partial", + ) + ) + + # We should see the partial row that we made in preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is *not* forgotten because of our test preparation + forgotten=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Forgotten status is now updated + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + attr.evolve(user1_snapshot, forgotten=True), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_membership_snapshot_forget(self) -> None: + """ + Test forgetting a room will update `sliding_sync_membership_snapshots` + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Check on the `sliding_sync_membership_snapshots` table (nothing should be + # forgotten yet) + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is not forgotten + forgotten=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Check on the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Room is now forgotten for user1 + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + attr.evolve(user1_snapshot, forgotten=True), + ) + # Nothing changed for user2 + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_membership_snapshot_missing_forget( + self, + ) -> None: + """ + Test forgetting a room with no existing row in `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # It doesn't explode + + # We still shouldn't find anything in the table because nothing has re-created them + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) From b45b1896aafbb4e22e0f9912cb73edc9969aece1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 22:09:57 -0500 Subject: [PATCH 084/142] Fill out docstring --- synapse/storage/databases/main/events.py | 31 +++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2965821f848..5cf0f5dbd0e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -271,7 +271,6 @@ async def _persist_events_and_state_updates( from being updated by these events. This should be set to True for backfilled events because backfilled events in the past do not affect the current local state. - sliding_sync_table_changes: TODO Returns: Resolves when the events have been persisted @@ -792,7 +791,10 @@ def _persist_events_txn( state_delta_for_room: The current-state delta for the room. new_forward_extremities: The new forward extremities for the room: a set of the event ids which are the forward extremities. - sliding_sync_table_changes: TODO + sliding_sync_table_changes: Changes to the + `sliding_sync_membership_snapshots` and `sliding_sync_joined_rooms` tables + derived from the given `delta_state` (see + `_calculate_sliding_sync_table_changes(...)`) Raises: PartialStateConflictError: if attempting to persist a partial state event in @@ -1453,7 +1455,17 @@ async def update_current_state( state_delta: DeltaState, sliding_sync_table_changes: SlidingSyncTableChanges, ) -> None: - """Update the current state stored in the datatabase for the given room""" + """ + Update the current state stored in the datatabase for the given room + + Args: + room_id + state_delta: Changes to the current state of the room + sliding_sync_table_changes: Changes to the + `sliding_sync_membership_snapshots` and `sliding_sync_joined_rooms` tables + derived from the given `delta_state` (see + `_calculate_sliding_sync_table_changes(...)`) + """ if state_delta.is_noop(): return @@ -1476,6 +1488,19 @@ def _update_current_state_txn( stream_id: int, sliding_sync_table_changes: SlidingSyncTableChanges, ) -> None: + """ + Handles updating tables that track the current state of a room. + + Args: + txn + room_id + delta_state: Changes to the current state of the room + stream_id: TODO + sliding_sync_table_changes: Changes to the + `sliding_sync_membership_snapshots` and `sliding_sync_joined_rooms` tables + derived from the given `delta_state` (see + `_calculate_sliding_sync_table_changes(...)`) + """ to_delete = delta_state.to_delete to_insert = delta_state.to_insert From 31300f4ce57fd9aaed89d11d6e8baa22dce41c55 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 22:15:19 -0500 Subject: [PATCH 085/142] More docstring --- synapse/storage/databases/main/events.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 5cf0f5dbd0e..61a97477f88 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -376,7 +376,7 @@ async def _calculate_sliding_sync_table_changes( save us from fetching the events from the database if we already have them. delta_state: Deltas that are going to be used to update the - `current_state_events` table. + `current_state_events` table. Changes to the current state of the room. """ to_insert = delta_state.to_insert to_delete = delta_state.to_delete @@ -788,7 +788,8 @@ def _persist_events_txn( delete_existing True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - state_delta_for_room: The current-state delta for the room. + state_delta_for_room: Deltas that are going to be used to update the + `current_state_events` table. Changes to the current state of the room. new_forward_extremities: The new forward extremities for the room: a set of the event ids which are the forward extremities. sliding_sync_table_changes: Changes to the @@ -1460,7 +1461,8 @@ async def update_current_state( Args: room_id - state_delta: Changes to the current state of the room + state_delta: Deltas that are going to be used to update the + `current_state_events` table. Changes to the current state of the room. sliding_sync_table_changes: Changes to the `sliding_sync_membership_snapshots` and `sliding_sync_joined_rooms` tables derived from the given `delta_state` (see @@ -1494,7 +1496,8 @@ def _update_current_state_txn( Args: txn room_id - delta_state: Changes to the current state of the room + delta_state: Deltas that are going to be used to update the + `current_state_events` table. Changes to the current state of the room. stream_id: TODO sliding_sync_table_changes: Changes to the `sliding_sync_membership_snapshots` and `sliding_sync_joined_rooms` tables From 339500d0673086879c257ccfc2060673961952c9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 21 Aug 2024 22:56:25 -0500 Subject: [PATCH 086/142] Fix sub-query selecting multiple rows --- synapse/storage/databases/main/events_bg_updates.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 253c324f9a3..6695a9578f1 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1998,7 +1998,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: {("," + ", ".join(insert_keys)) if insert_keys else ""}) VALUES ( ?, ?, ?, ?, ?, - (SELECT forgotten FROM room_memberships WHERE room_id = ? AND user_id = ?), + (SELECT forgotten FROM room_memberships WHERE event_id = ?), ? {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} ) @@ -2012,8 +2012,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: sender, membership_event_id, membership, - room_id, - user_id, + membership_event_id, membership_event_stream_ordering, ] + list(insert_values), From 9b8d2017af86d3fb74cee1b2495d9af8ea85326d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 11:34:40 -0500 Subject: [PATCH 087/142] Check `events_and_context` for state events See https://github.com/element-hq/synapse/pull/17512#discussion_r1726798803 --- synapse/storage/databases/main/events.py | 32 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 61a97477f88..209573928da 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -468,14 +468,30 @@ async def _calculate_sliding_sync_table_changes( if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: current_state_ids_map[state_key] = event_id - fetched_events = await self.store.get_events( - current_state_ids_map.values() - ) - - current_state_map: StateMap[EventBase] = { - state_key: fetched_events[event_id] - for state_key, event_id in current_state_ids_map.items() - } + current_state_map: MutableStateMap[EventBase] = {} + # In normal event persist scenarios, we probably won't be able to find + # these state events in `events_and_contexts` since we don't generally + # batch up local membership changes with other events, but it can + # happen. + missing_state_event_ids: Set[str] = set() + for state_key, event_id in current_state_ids_map.items(): + event = event_map.get(event_id) + if event: + current_state_map[state_key] = event + else: + missing_state_event_ids.add(event_id) + + # Otherwise, we need to find a couple events + if missing_state_event_ids: + remaining_events = await self.store.get_events( + missing_state_event_ids + ) + # There shouldn't be any missing events + assert ( + remaining_events.keys() == missing_state_event_ids + ), missing_state_event_ids.difference(remaining_events.keys()) + for event in remaining_events.values(): + current_state_map[(event.type, event.state_key)] = event if current_state_map: state_insert_values = PersistEventsStore._get_sliding_sync_insert_values_from_state_map( From fc73b6ffc90067414530bb5c7a5d7bc8af3e8870 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 11:44:57 -0500 Subject: [PATCH 088/142] Rename `insert_key`/`insert_value` See https://github.com/element-hq/synapse/pull/17512#discussion_r1726805894 --- synapse/storage/databases/main/events.py | 36 +++++++++++++----------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 209573928da..74d2a8b0c53 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1646,10 +1646,14 @@ def _update_current_state_txn( # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) # # Pulling keys/values separately is safe and will produce congruent lists - insert_keys = sliding_sync_table_changes.joined_room_updates.keys() - insert_values = sliding_sync_table_changes.joined_room_updates.values() + sliding_sync_updates_keys = ( + sliding_sync_table_changes.joined_room_updates.keys() + ) + sliding_sync_updates_values = ( + sliding_sync_table_changes.joined_room_updates.values() + ) # We only need to update when one of the relevant state values has changed - if insert_keys: + if sliding_sync_updates_keys: # If we have some `to_insert` values, we can use the standard upsert # pattern because we have access to an `event_id` to use for the # `event_stream_ordering` which has a `NON NULL` constraint. @@ -1676,7 +1680,7 @@ def _update_current_state_txn( next(iter(to_insert.values())), ] - args.extend(iter(insert_values)) + args.extend(iter(sliding_sync_updates_values)) # We don't update `event_stream_ordering` `ON CONFLICT` because it's # simpler and we can just rely on @@ -1687,15 +1691,15 @@ def _update_current_state_txn( txn.execute( f""" INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, {", ".join(insert_keys)}) + (room_id, event_stream_ordering, {", ".join(sliding_sync_updates_keys)}) VALUES ( ?, (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in insert_values)} + {", ".join("?" for _ in sliding_sync_updates_values)} ) ON CONFLICT (room_id) DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} + {", ".join(f"{key} = EXCLUDED.{key}" for key in sliding_sync_updates_keys)} """, args, ) @@ -1704,12 +1708,12 @@ def _update_current_state_txn( # instead because there is no `event_id` to use for the `NON NULL` # constraint on `event_stream_ordering`. elif to_delete: - args = list(insert_values) + [room_id] + args = list(sliding_sync_updates_values) + [room_id] txn.execute( f""" UPDATE sliding_sync_joined_rooms SET - {", ".join(f"{key} = ?" for key in insert_keys)} + {", ".join(f"{key} = ?" for key in sliding_sync_updates_keys)} WHERE room_id = ? """, args, @@ -1772,23 +1776,23 @@ def _update_current_state_txn( # # Pulling keys/values separately is safe and will produce congruent # lists - insert_keys = ( + sliding_sync_snapshot_keys = ( sliding_sync_table_changes.membership_snapshot_shared_insert_values.keys() ) - insert_values = ( + sliding_sync_snapshot_values = ( sliding_sync_table_changes.membership_snapshot_shared_insert_values.values() ) - # We need to insert/update regardless of whether we have `insert_keys` + # We need to insert/update regardless of whether we have `sliding_sync_snapshot_keys` # because there are other fields in the `ON CONFLICT` upsert to run (see # inherit case above for more context when this happens). txn.execute_batch( f""" INSERT INTO sliding_sync_membership_snapshots (room_id, user_id, sender, membership_event_id, membership, event_stream_ordering - {("," + ", ".join(insert_keys)) if insert_keys else ""}) + {("," + ", ".join(sliding_sync_snapshot_keys)) if sliding_sync_snapshot_keys else ""}) VALUES ( ?, ?, ?, ?, ?, ? - {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} + {("," + ", ".join("?" for _ in sliding_sync_snapshot_values)) if sliding_sync_snapshot_values else ""} ) ON CONFLICT (room_id, user_id) DO UPDATE SET @@ -1796,7 +1800,7 @@ def _update_current_state_txn( membership_event_id = EXCLUDED.membership_event_id, membership = EXCLUDED.membership, event_stream_ordering = EXCLUDED.event_stream_ordering - {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)) if insert_keys else ""} + {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in sliding_sync_snapshot_keys)) if sliding_sync_snapshot_keys else ""} """, [ [ @@ -1807,7 +1811,7 @@ def _update_current_state_txn( membership_info.membership, membership_info.membership_event_stream_ordering, ] - + list(insert_values) + + list(sliding_sync_snapshot_values) for membership_info in sliding_sync_table_changes.to_insert_membership_snapshots ], ) From 980ee9aad63a2c3f3ebb141135a7cc1e202e6ce3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 15:40:08 -0500 Subject: [PATCH 089/142] Prefer `simple_update_txn` See https://github.com/element-hq/synapse/pull/17512#discussion_r1726808112 --- synapse/storage/database.py | 5 +++-- synapse/storage/databases/main/events.py | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 569f6181939..66a7238debb 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -35,6 +35,7 @@ Iterable, Iterator, List, + Mapping, Optional, Sequence, Tuple, @@ -1966,8 +1967,8 @@ async def simple_update( def simple_update_txn( txn: LoggingTransaction, table: str, - keyvalues: Dict[str, Any], - updatevalues: Dict[str, Any], + keyvalues: Mapping[str, Any], + updatevalues: Mapping[str, Any], ) -> int: """ Update rows in the given database table. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 74d2a8b0c53..c59c9b463c9 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1708,16 +1708,18 @@ def _update_current_state_txn( # instead because there is no `event_id` to use for the `NON NULL` # constraint on `event_stream_ordering`. elif to_delete: - args = list(sliding_sync_updates_values) + [room_id] - txn.execute( - f""" - UPDATE sliding_sync_joined_rooms - SET - {", ".join(f"{key} = ?" for key in sliding_sync_updates_keys)} - WHERE room_id = ? - """, - args, + num_rows_updated = self.db_pool.simple_update_txn( + txn, + table="sliding_sync_joined_rooms", + keyvalues={ + "room_id": room_id, + }, + updatevalues=sliding_sync_table_changes.joined_room_updates, ) + # TODO: Is this assumption correct? + assert ( + num_rows_updated > 0 + ), "Expected to only run this against existing rows" # We now update `local_current_membership`. We do this regardless # of whether we're still in the room or not to handle the case where From 6723824c4af36587e6427a8b45cde9a61f896c99 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 15:47:44 -0500 Subject: [PATCH 090/142] Prefer `simple_delete_many_txn` See https://github.com/element-hq/synapse/pull/17512#discussion_r1726819380 --- synapse/storage/databases/main/events.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c59c9b463c9..88629b97a6c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1761,13 +1761,12 @@ def _update_current_state_txn( # # This would only happen if someone was state reset out of the room if sliding_sync_table_changes.to_delete_membership_snapshots: - txn.execute_batch( - "DELETE FROM sliding_sync_membership_snapshots" - " WHERE room_id = ? AND user_id = ?", - [ - (room_id, user_id) - for user_id in sliding_sync_table_changes.to_delete_membership_snapshots - ], + self.db_pool.simple_delete_many_txn( + txn, + table="sliding_sync_membership_snapshots", + column="user_id", + values=sliding_sync_table_changes.to_delete_membership_snapshots, + keyvalues={"room_id": room_id}, ) # We do this regardless of whether the server is `no_longer_in_room` or not From d61aada8badc15988cdb482309d08c1777fee659 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 16:35:59 -0500 Subject: [PATCH 091/142] Simplify `_update_sliding_sync_tables_with_new_persisted_events_txn()` See https://github.com/element-hq/synapse/pull/17512#discussion_r1719997640 https://github.com/element-hq/synapse/pull/17512#discussion_r1726828894 https://github.com/element-hq/synapse/pull/17512#discussion_r1726836440 --- synapse/storage/databases/main/events.py | 93 ++++++++++++++---------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 88629b97a6c..f89d4bdb970 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -932,8 +932,9 @@ def _persist_events_txn( sliding_sync_table_changes, ) + # We only update the sliding sync tables for non-backfilled events. self._update_sliding_sync_tables_with_new_persisted_events_txn( - txn, events_and_contexts + txn, room_id, events_and_contexts ) def _persist_event_auth_chain_txn( @@ -2048,6 +2049,7 @@ def _get_sliding_sync_insert_values_from_stripped_state_txn( def _update_sliding_sync_tables_with_new_persisted_events_txn( self, txn: LoggingTransaction, + room_id: str, events_and_contexts: List[Tuple[EventBase, EventContext]], ) -> None: """ @@ -2060,42 +2062,57 @@ def _update_sliding_sync_tables_with_new_persisted_events_txn( Args: txn - events_and_contexts: The events being persisted + room_id: The room that all of the events belong to + events_and_contexts: The events being persisted. We assume the list is + sorted ascending by `stream_ordering`. We don't care about the sort when the + events are backfilled (with negative `stream_ordering`). """ - # Handle updating the `sliding_sync_joined_rooms` table. - room_id_to_stream_ordering_map: Dict[str, int] = {} - room_id_to_bump_stamp_map: Dict[str, int] = {} - for event, _ in events_and_contexts: - existing_stream_ordering = room_id_to_stream_ordering_map.get(event.room_id) - # This should exist for persisted events - assert event.internal_metadata.stream_ordering is not None + # Nothing to do if there are no events + if len(events_and_contexts) == 0: + return - # Ignore backfilled events which will have a negative stream ordering - if event.internal_metadata.stream_ordering < 0: - continue + # We only update the sliding sync tables for non-backfilled events. + # + # Check if the first event is a backfilled event (with a negative + # `stream_ordering`). If one event is backfilled, we assume this whole batch was + # backfilled. + first_event_stream_ordering = events_and_contexts[0][ + 0 + ].internal_metadata.stream_ordering + # This should exist for persisted events + assert first_event_stream_ordering is not None + if first_event_stream_ordering < 0: + return - if ( - existing_stream_ordering is None - or existing_stream_ordering < event.internal_metadata.stream_ordering - ): - room_id_to_stream_ordering_map[event.room_id] = ( - event.internal_metadata.stream_ordering - ) + # Since the list is sorted ascending by `stream_ordering`, the last event should + # have the highest `stream_ordering`. + max_stream_ordering = events_and_contexts[-1][ + 0 + ].internal_metadata.stream_ordering + max_bump_stamp = None + for event, _ in reversed(events_and_contexts): + # Sanity check that all events belong to the same room + assert event.room_id == room_id if event.type in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES: - existing_bump_stamp = room_id_to_bump_stamp_map.get(event.room_id) - # This should exist at this point because we're inserting events here which require it + # This should exist for persisted events assert event.internal_metadata.stream_ordering is not None - if ( - existing_bump_stamp is None - or existing_bump_stamp < event.internal_metadata.stream_ordering - ): - room_id_to_bump_stamp_map[event.room_id] = ( - event.internal_metadata.stream_ordering - ) - txn.execute_batch( + max_bump_stamp = event.internal_metadata.stream_ordering + + # Since we're iterating in reverse, we can break as soon as we find a + # matching bump event which should have the highest `stream_ordering`. + break + + # We should have exited earlier if there were no events + assert ( + max_stream_ordering is not None + ), "Expected to have a stream_ordering if we have events" + + # Handle updating the `sliding_sync_joined_rooms` table. + # + txn.execute( """ UPDATE sliding_sync_joined_rooms SET @@ -2111,17 +2128,15 @@ def _update_sliding_sync_tables_with_new_persisted_events_txn( END WHERE room_id = ? """, - [ - [ - room_id_to_stream_ordering_map[room_id], - room_id_to_stream_ordering_map[room_id], - room_id_to_bump_stamp_map.get(room_id), - room_id_to_bump_stamp_map.get(room_id), - room_id, - ] - for room_id in room_id_to_stream_ordering_map.keys() - ], + ( + max_stream_ordering, + max_stream_ordering, + max_bump_stamp, + max_bump_stamp, + room_id, + ), ) + # This may or may not update any rows depending if we are `no_longer_in_room` def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: """Update the room version in the database based off current state From 4d87fa61c66ae5196cf813171c207fbd7ff71ffc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 16:44:02 -0500 Subject: [PATCH 092/142] "backfill" -> "bg_update" See https://github.com/element-hq/synapse/pull/17512#discussion_r1726837698 --- .../databases/main/events_bg_updates.py | 42 +++++++++---------- .../delta/87/01_sliding_sync_memberships.sql | 4 +- tests/storage/test_events.py | 18 ++++---- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 6695a9578f1..c4938f6fc35 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -88,9 +88,9 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" - SLIDING_SYNC_JOINED_ROOMS_BACKFILL = "sliding_sync_joined_rooms_backfill" - SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL = ( - "sliding_sync_membership_snapshots_backfill" + SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE = "sliding_sync_joined_rooms_bg_update" + SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE = ( + "sliding_sync_membership_snapshots_bg_update" ) @@ -296,12 +296,12 @@ def __init__( # Backfill the sliding sync tables self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, - self._sliding_sync_joined_rooms_backfill, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, + self._sliding_sync_joined_rooms_bg_update, ) self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, - self._sliding_sync_membership_snapshots_backfill, + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + self._sliding_sync_membership_snapshots_bg_update, ) async def _background_reindex_fields_sender( @@ -1542,7 +1542,7 @@ def _populate_txn(txn: LoggingTransaction) -> bool: return batch_size - async def _sliding_sync_joined_rooms_backfill( + async def _sliding_sync_joined_rooms_bg_update( self, progress: JsonDict, batch_size: int ) -> int: """ @@ -1567,13 +1567,13 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: return [row[0] for row in rooms_to_update_rows] rooms_to_update = await self.db_pool.runInteraction( - "_sliding_sync_joined_rooms_backfill._get_rooms_to_update_txn", + "_sliding_sync_joined_rooms_bg_update._get_rooms_to_update_txn", _get_rooms_to_update_txn, ) if not rooms_to_update: await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE ) return 0 @@ -1584,7 +1584,7 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: for room_id in rooms_to_update: current_state_ids_map, last_current_state_delta_stream_id = ( await self.db_pool.runInteraction( - "_sliding_sync_joined_rooms_backfill._get_relevant_sliding_sync_current_state_event_ids_txn", + "_sliding_sync_joined_rooms_bg_update._get_relevant_sliding_sync_current_state_event_ids_txn", PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn, room_id, ) @@ -1664,7 +1664,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: if last_successful_room_id is not None: self.db_pool.updates._background_update_progress_txn( txn, - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, {"last_room_id": room_id}, ) # Raising exception so we can just exit and try again. It would @@ -1710,18 +1710,18 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: last_successful_room_id = room_id await self.db_pool.runInteraction( - "sliding_sync_joined_rooms_backfill", _backfill_table_txn + "sliding_sync_joined_rooms_bg_update", _backfill_table_txn ) # Update the progress await self.db_pool.updates._background_update_progress( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, {"last_room_id": rooms_to_update[-1]}, ) return len(rooms_to_update) - async def _sliding_sync_membership_snapshots_backfill( + async def _sliding_sync_membership_snapshots_bg_update( self, progress: JsonDict, batch_size: int ) -> int: """ @@ -1761,13 +1761,13 @@ def _find_memberships_to_update_txn( return memberships_to_update_rows memberships_to_update_rows = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._find_memberships_to_update_txn", + "sliding_sync_membership_snapshots_bg_update._find_memberships_to_update_txn", _find_memberships_to_update_txn, ) if not memberships_to_update_rows: await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE ) return 0 @@ -1882,7 +1882,7 @@ def _find_previous_membership_txn( if membership == Membership.LEAVE and is_outlier: invite_or_knock_event_id, invite_or_knock_membership = ( await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._find_previous_membership", + "sliding_sync_membership_snapshots_bg_update._find_previous_membership", _find_previous_membership_txn, room_id, user_id, @@ -1906,7 +1906,7 @@ def _find_previous_membership_txn( raw_stripped_state_events = knock_room_state sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill._get_sliding_sync_insert_values_from_stripped_state_txn", + "sliding_sync_membership_snapshots_bg_update._get_sliding_sync_insert_values_from_stripped_state_txn", PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, raw_stripped_state_events, ) @@ -2019,7 +2019,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: ) await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_backfill", _backfill_table_txn + "sliding_sync_membership_snapshots_bg_update", _backfill_table_txn ) # Update the progress @@ -2033,7 +2033,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: _is_outlier, ) = memberships_to_update_rows[-1] await self.db_pool.updates._background_update_progress( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, {"last_event_stream_ordering": membership_event_stream_ordering}, ) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 680f0ea8d16..8d7607c15f0 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -129,6 +129,6 @@ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream -- Add some background updates to populate the new tables INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (8701, 'sliding_sync_joined_rooms_backfill', '{}'); + (8701, 'sliding_sync_joined_rooms_bg_update', '{}'); INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (8701, 'sliding_sync_membership_snapshots_backfill', '{}'); + (8701, 'sliding_sync_membership_snapshots_bg_update', '{}'); diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 2b8e93e3ed5..bd6c625441a 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2926,7 +2926,7 @@ def test_joined_background_update_missing(self) -> None: self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, "progress_json": "{}", }, ) @@ -3073,7 +3073,7 @@ def test_joined_background_update_partial(self) -> None: self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, "progress_json": "{}", }, ) @@ -3184,7 +3184,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) @@ -3381,7 +3381,7 @@ def test_membership_snapshots_background_update_local_invite(self) -> None: self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) @@ -3599,7 +3599,7 @@ def test_membership_snapshots_background_update_remote_invite( self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) @@ -3833,7 +3833,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) @@ -4094,7 +4094,7 @@ def test_membership_snapshots_background_update_historical_state( self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) @@ -4248,7 +4248,7 @@ def test_membership_snapshots_background_update_forgotten_missing(self) -> None: self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) @@ -4408,7 +4408,7 @@ def test_membership_snapshots_background_update_forgotten_partial(self) -> None: self.store.db_pool.simple_insert( "background_updates", { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BACKFILL, + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, "progress_json": "{}", }, ) From 693c06b2f15f7388e12016e827b156f699ea4966 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 16:48:02 -0500 Subject: [PATCH 093/142] Move away from backfill language --- .../storage/databases/main/events_bg_updates.py | 14 +++++++------- tests/storage/test_events.py | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index c4938f6fc35..ac3a000f887 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -294,7 +294,7 @@ def __init__( where_clause="NOT outlier", ) - # Backfill the sliding sync tables + # Add some background updates to populate the sliding sync tables self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, self._sliding_sync_joined_rooms_bg_update, @@ -1546,7 +1546,7 @@ async def _sliding_sync_joined_rooms_bg_update( self, progress: JsonDict, batch_size: int ) -> int: """ - Handles backfilling the `sliding_sync_joined_rooms` table. + Background update to populate the `sliding_sync_joined_rooms` table. """ last_room_id = progress.get("last_room_id", "") @@ -1631,7 +1631,7 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: last_current_state_delta_stream_id, ) - def _backfill_table_txn(txn: LoggingTransaction) -> None: + def _fill_table_txn(txn: LoggingTransaction) -> None: # Handle updating the `sliding_sync_joined_rooms` table # last_successful_room_id: Optional[str] = None @@ -1710,7 +1710,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: last_successful_room_id = room_id await self.db_pool.runInteraction( - "sliding_sync_joined_rooms_bg_update", _backfill_table_txn + "sliding_sync_joined_rooms_bg_update", _fill_table_txn ) # Update the progress @@ -1725,7 +1725,7 @@ async def _sliding_sync_membership_snapshots_bg_update( self, progress: JsonDict, batch_size: int ) -> int: """ - Handles backfilling the `sliding_sync_membership_snapshots` table. + Background update to populate the `sliding_sync_membership_snapshots` table. """ last_event_stream_ordering = progress.get( "last_event_stream_ordering", -(1 << 31) @@ -1967,7 +1967,7 @@ def _find_previous_membership_txn( membership_event_stream_ordering=membership_event_stream_ordering, ) - def _backfill_table_txn(txn: LoggingTransaction) -> None: + def _fill_table_txn(txn: LoggingTransaction) -> None: # Handle updating the `sliding_sync_membership_snapshots` table # for key, insert_map in to_insert_membership_snapshots.items(): @@ -2019,7 +2019,7 @@ def _backfill_table_txn(txn: LoggingTransaction) -> None: ) await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_bg_update", _backfill_table_txn + "sliding_sync_membership_snapshots_bg_update", _fill_table_txn ) # Update the progress diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index bd6c625441a..25710304a24 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -2860,7 +2860,7 @@ def test_non_join_state_reset(self) -> None: def test_joined_background_update_missing(self) -> None: """ - Test that the background update for `sliding_sync_joined_rooms` backfills missing rows + Test that the background update for `sliding_sync_joined_rooms` populates missing rows """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -3004,7 +3004,7 @@ def test_joined_background_update_missing(self) -> None: def test_joined_background_update_partial(self) -> None: """ - Test that the background update for `sliding_sync_joined_rooms` backfills + Test that the background update for `sliding_sync_joined_rooms` populates partially updated rows. """ user1_id = self.register_user("user1", "pass") @@ -3109,7 +3109,7 @@ def test_joined_background_update_partial(self) -> None: def test_membership_snapshots_background_update_joined(self) -> None: """ Test that the background update for `sliding_sync_membership_snapshots` - backfills missing rows for join memberships. + populates missing rows for join memberships. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -3274,7 +3274,7 @@ def test_membership_snapshots_background_update_joined(self) -> None: def test_membership_snapshots_background_update_local_invite(self) -> None: """ Test that the background update for `sliding_sync_membership_snapshots` - backfills missing rows for invite memberships. + populates missing rows for invite memberships. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -3479,7 +3479,7 @@ def test_membership_snapshots_background_update_remote_invite( ) -> None: """ Test that the background update for `sliding_sync_membership_snapshots` - backfills missing rows for remote invites (out-of-band memberships). + populates missing rows for remote invites (out-of-band memberships). """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -3696,7 +3696,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret ) -> None: """ Test that the background update for `sliding_sync_membership_snapshots` - backfills missing rows for remote invite rejections/retractions (out-of-band memberships). + populates missing rows for remote invite rejections/retractions (out-of-band memberships). """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -3947,7 +3947,7 @@ def test_membership_snapshots_background_update_historical_state( ) -> None: """ Test that the background update for `sliding_sync_membership_snapshots` - backfills missing rows for leave memberships. + populates missing rows for leave memberships. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") From bcba8cccfe7f9f3ce1daba803fb4b44242d95aac Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 16:52:31 -0500 Subject: [PATCH 094/142] No need for transaction See https://github.com/element-hq/synapse/pull/17512#discussion_r1726844107 --- synapse/storage/databases/main/events.py | 17 +++++++++-------- .../storage/databases/main/events_bg_updates.py | 6 ++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f89d4bdb970..a4a573dd1ac 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1951,12 +1951,13 @@ def _get_sliding_sync_insert_values_from_state_map( return sliding_sync_insert_map @classmethod - def _get_sliding_sync_insert_values_from_stripped_state_txn( - cls, txn: LoggingTransaction, unsigned_stripped_state_events: Any + def _get_sliding_sync_insert_values_from_stripped_state( + cls, unsigned_stripped_state_events: Any ) -> SlidingSyncMembershipSnapshotSharedInsertValues: """ - Pull out the relevant state values from the stripped state needed to insert into - the `sliding_sync_membership_snapshots` tables. + Pull out the relevant state values from the stripped state on an invite or knock + membership event needed to insert into the `sliding_sync_membership_snapshots` + tables. Returns: Map from column names (`room_type`, `is_encrypted`, `room_name`) to relevant @@ -2032,13 +2033,13 @@ def _get_sliding_sync_insert_values_from_stripped_state_txn( ) else: - # No strip state provided + # No stripped state provided sliding_sync_insert_map["has_known_state"] = False sliding_sync_insert_map["room_type"] = None sliding_sync_insert_map["room_name"] = None sliding_sync_insert_map["is_encrypted"] = False else: - # No strip state provided + # No stripped state provided sliding_sync_insert_map["has_known_state"] = False sliding_sync_insert_map["room_type"] = None sliding_sync_insert_map["room_name"] = None @@ -2818,8 +2819,8 @@ def _store_room_members_txn( pass elif event.membership in (Membership.INVITE, Membership.KNOCK): extra_insert_values = ( - self._get_sliding_sync_insert_values_from_stripped_state_txn( - txn, raw_stripped_state_events + self._get_sliding_sync_insert_values_from_stripped_state( + raw_stripped_state_events ) ) insert_values.update(extra_insert_values) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index ac3a000f887..c3ee2952d60 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1905,10 +1905,8 @@ def _find_previous_membership_txn( ) raw_stripped_state_events = knock_room_state - sliding_sync_membership_snapshots_insert_map = await self.db_pool.runInteraction( - "sliding_sync_membership_snapshots_bg_update._get_sliding_sync_insert_values_from_stripped_state_txn", - PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state_txn, - raw_stripped_state_events, + sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state( + raw_stripped_state_events ) # We should have some insert values for each room, even if no From 44432e211823594d55780a758d5904e7973308ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 16:56:09 -0500 Subject: [PATCH 095/142] Move tests to dedicated file See https://github.com/element-hq/synapse/pull/17512#discussion_r1726849798 --- tests/storage/test_events.py | 4129 +------------------- tests/storage/test_sliding_sync_tables.py | 4159 +++++++++++++++++++++ 2 files changed, 4162 insertions(+), 4126 deletions(-) create mode 100644 tests/storage/test_sliding_sync_tables.py diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 25710304a24..7cc1367f94d 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -20,27 +20,21 @@ # import logging -from typing import Dict, List, Optional, Tuple, cast +from typing import List, Optional -import attr -from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes +from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions -from synapse.events import EventBase, StrippedStateEvent, make_event_from_dict -from synapse.events.snapshot import EventContext +from synapse.events import EventBase from synapse.federation.federation_base import event_from_pdu_json from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer -from synapse.storage.databases.main.events import DeltaState -from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.types import StateMap from synapse.util import Clock -from tests.test_utils.event_injection import create_event from tests.unittest import HomeserverTestCase logger = logging.getLogger(__name__) @@ -491,4120 +485,3 @@ def test_room_remote_user_cache_invalidated(self) -> None: users = self.get_success(self.store.get_users_in_room(room_id)) self.assertEqual(users, []) - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class _SlidingSyncJoinedRoomResult: - room_id: str - # `event_stream_ordering` is only optional to allow easier semantics when we make - # expected objects from `event.internal_metadata.stream_ordering`. in the tests. - # `event.internal_metadata.stream_ordering` is marked optional because it only - # exists for persisted events but in the context of these tests, we're only working - # with persisted events and we're making comparisons so we will find any mismatch. - event_stream_ordering: Optional[int] - bump_stamp: Optional[int] - room_type: Optional[str] - room_name: Optional[str] - is_encrypted: bool - tombstone_successor_room_id: Optional[str] - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class _SlidingSyncMembershipSnapshotResult: - room_id: str - user_id: str - sender: str - membership_event_id: str - membership: str - # `event_stream_ordering` is only optional to allow easier semantics when we make - # expected objects from `event.internal_metadata.stream_ordering`. in the tests. - # `event.internal_metadata.stream_ordering` is marked optional because it only - # exists for persisted events but in the context of these tests, we're only working - # with persisted events and we're making comparisons so we will find any mismatch. - event_stream_ordering: Optional[int] - has_known_state: bool - room_type: Optional[str] - room_name: Optional[str] - is_encrypted: bool - tombstone_successor_room_id: Optional[str] - # Make this default to "not forgotten" because it doesn't apply to many tests and we - # don't want to force all of the tests to deal with it. - forgotten: bool = False - - -class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): - """ - Tests to make sure the - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` database tables are - populated correctly. - """ - - servlets = [ - admin.register_servlets, - login.register_servlets, - room.register_servlets, - ] - - def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - self.store = hs.get_datastores().main - self.storage_controllers = hs.get_storage_controllers() - persist_events_store = self.hs.get_datastores().persist_events - assert persist_events_store is not None - self.persist_events_store = persist_events_store - - def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResult]: - """ - Return the rows from the `sliding_sync_joined_rooms` table. - - Returns: - Mapping from room_id to _SlidingSyncJoinedRoomResult. - """ - rows = cast( - List[Tuple[str, int, int, str, str, bool, str]], - self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_joined_rooms", - None, - retcols=( - "room_id", - "event_stream_ordering", - "bump_stamp", - "room_type", - "room_name", - "is_encrypted", - "tombstone_successor_room_id", - ), - ), - ), - ) - - return { - row[0]: _SlidingSyncJoinedRoomResult( - room_id=row[0], - event_stream_ordering=row[1], - bump_stamp=row[2], - room_type=row[3], - room_name=row[4], - is_encrypted=bool(row[5]), - tombstone_successor_room_id=row[6], - ) - for row in rows - } - - def _get_sliding_sync_membership_snapshots( - self, - ) -> Dict[Tuple[str, str], _SlidingSyncMembershipSnapshotResult]: - """ - Return the rows from the `sliding_sync_membership_snapshots` table. - - Returns: - Mapping from the (room_id, user_id) to _SlidingSyncMembershipSnapshotResult. - """ - rows = cast( - List[Tuple[str, str, str, str, str, int, int, bool, str, str, bool, str]], - self.get_success( - self.store.db_pool.simple_select_list( - "sliding_sync_membership_snapshots", - None, - retcols=( - "room_id", - "user_id", - "sender", - "membership_event_id", - "membership", - "forgotten", - "event_stream_ordering", - "has_known_state", - "room_type", - "room_name", - "is_encrypted", - "tombstone_successor_room_id", - ), - ), - ), - ) - - return { - (row[0], row[1]): _SlidingSyncMembershipSnapshotResult( - room_id=row[0], - user_id=row[1], - sender=row[2], - membership_event_id=row[3], - membership=row[4], - forgotten=bool(row[5]), - event_stream_ordering=row[6], - has_known_state=bool(row[7]), - room_type=row[8], - room_name=row[9], - is_encrypted=bool(row[10]), - tombstone_successor_room_id=row[11], - ) - for row in rows - } - - _remote_invite_count: int = 0 - - def _create_remote_invite_room_for_user( - self, - invitee_user_id: str, - unsigned_invite_room_state: Optional[List[StrippedStateEvent]], - ) -> Tuple[str, EventBase]: - """ - Create a fake invite for a remote room and persist it. - - We don't have any state for these kind of rooms and can only rely on the - stripped state included in the unsigned portion of the invite event to identify - the room. - - Args: - invitee_user_id: The person being invited - unsigned_invite_room_state: List of stripped state events to assist the - receiver in identifying the room. - - Returns: - The room ID of the remote invite room and the persisted remote invite event. - """ - invite_room_id = f"!test_room{self._remote_invite_count}:remote_server" - - invite_event_dict = { - "room_id": invite_room_id, - "sender": "@inviter:remote_server", - "state_key": invitee_user_id, - "depth": 1, - "origin_server_ts": 1, - "type": EventTypes.Member, - "content": {"membership": Membership.INVITE}, - "auth_events": [], - "prev_events": [], - } - if unsigned_invite_room_state is not None: - serialized_stripped_state_events = [] - for stripped_event in unsigned_invite_room_state: - serialized_stripped_state_events.append( - { - "type": stripped_event.type, - "state_key": stripped_event.state_key, - "sender": stripped_event.sender, - "content": stripped_event.content, - } - ) - - invite_event_dict["unsigned"] = { - "invite_room_state": serialized_stripped_state_events - } - - invite_event = make_event_from_dict( - invite_event_dict, - room_version=RoomVersions.V10, - ) - invite_event.internal_metadata.outlier = True - invite_event.internal_metadata.out_of_band_membership = True - - self.get_success( - self.store.maybe_store_room_on_outlier_membership( - room_id=invite_room_id, room_version=invite_event.room_version - ) - ) - context = EventContext.for_outlier(self.hs.get_storage_controllers()) - persist_controller = self.hs.get_storage_controllers().persistence - assert persist_controller is not None - persisted_event, _, _ = self.get_success( - persist_controller.persist_event(invite_event, context) - ) - - self._remote_invite_count += 1 - - return invite_room_id, persisted_event - - def _retract_remote_invite_for_user( - self, - user_id: str, - remote_room_id: str, - ) -> EventBase: - """ - Create a fake invite retraction for a remote room and persist it. - - Retracting an invite just means the person is no longer invited to the room. - This is done by someone with proper power levels kicking the user from the room. - A kick shows up as a leave event for a given person with a different `sender`. - - Args: - user_id: The person who was invited and we're going to retract the - invite for. - remote_room_id: The room ID that the invite was for. - - Returns: - The persisted leave (kick) event. - """ - - kick_event_dict = { - "room_id": remote_room_id, - "sender": "@inviter:remote_server", - "state_key": user_id, - "depth": 1, - "origin_server_ts": 1, - "type": EventTypes.Member, - "content": {"membership": Membership.LEAVE}, - "auth_events": [], - "prev_events": [], - } - - kick_event = make_event_from_dict( - kick_event_dict, - room_version=RoomVersions.V10, - ) - kick_event.internal_metadata.outlier = True - kick_event.internal_metadata.out_of_band_membership = True - - self.get_success( - self.store.maybe_store_room_on_outlier_membership( - room_id=remote_room_id, room_version=kick_event.room_version - ) - ) - context = EventContext.for_outlier(self.hs.get_storage_controllers()) - persist_controller = self.hs.get_storage_controllers().persistence - assert persist_controller is not None - persisted_event, _, _ = self.get_success( - persist_controller.persist_event(kick_event, context) - ) - - return persisted_event - - def test_joined_room_with_no_info(self) -> None: - """ - Test joined room that doesn't have a room type, encryption, or name shows up in - `sliding_sync_joined_rooms`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id1) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - # History visibility just happens to be the last event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomHistoryVisibility, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_joined_room_with_info(self) -> None: - """ - Test joined encrypted room with name shows up in `sliding_sync_joined_rooms`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id1, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id1, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user2_tok, - ) - # Add a tombstone - self.helper.send_state( - room_id1, - EventTypes.Tombstone, - {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, - tok=user2_tok, - ) - - # User1 joins the room - self.helper.join(room_id1, user1_id, tok=user1_tok) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id1) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - # This should be whatever is the last event in the room - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - # Even though this room does have a name, is encrypted, and has a - # tombstone, user2 is the room creator and joined at the room creation - # time which didn't have this state set yet. - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_joined_space_room_with_info(self) -> None: - """ - Test joined space room with name shows up in `sliding_sync_joined_rooms`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - space_room_id = self.helper.create_room_as( - user2_id, - tok=user2_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) - # Add a room name - self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space"}, - tok=user2_tok, - ) - - # User1 joins the room - user1_join_response = self.helper.join(space_room_id, user1_id, tok=user1_tok) - user1_join_event_pos = self.get_success( - self.store.get_position_for_event(user1_join_response["event_id"]) - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(space_room_id) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {space_room_id}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[space_room_id], - _SlidingSyncJoinedRoomResult( - room_id=space_room_id, - event_stream_ordering=user1_join_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (space_room_id, user1_id), - (space_room_id, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - # Even though this room does have a name, user2 is the room creator and - # joined at the room creation time which didn't have this state set yet. - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_joined_room_with_state_updated(self) -> None: - """ - Test state derived info in `sliding_sync_joined_rooms` is updated when the - current state is updated. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id1, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - - # User1 joins the room - user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) - user1_join_event_pos = self.get_success( - self.store.get_position_for_event(user1_join_response["event_id"]) - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id1) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - event_stream_ordering=user1_join_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - - # Update the room name - self.helper.send_state( - room_id1, - EventTypes.Name, - {"name": "my super duper room was renamed"}, - tok=user2_tok, - ) - # Encrypt the room - encrypt_room_response = self.helper.send_state( - room_id1, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user2_tok, - ) - encrypt_room_event_pos = self.get_success( - self.store.get_position_for_event(encrypt_room_response["event_id"]) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - # Make sure we see the new room name - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - event_stream_ordering=encrypt_room_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room was renamed", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_joined_room_is_bumped(self) -> None: - """ - Test that `event_stream_ordering` and `bump_stamp` is updated when a new bump - event is sent (`sliding_sync_joined_rooms`). - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id1, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - - # User1 joins the room - user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) - user1_join_event_pos = self.get_success( - self.store.get_position_for_event(user1_join_response["event_id"]) - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id1) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - event_stream_ordering=user1_join_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - user1_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - user1_snapshot, - ) - # Holds the info according to the current state when the user joined - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - user2_snapshot, - ) - - # Send a new message to bump the room - event_response = self.helper.send(room_id1, "some message", tok=user1_tok) - event_pos = self.get_success( - self.store.get_position_for_event(event_response["event_id"]) - ) - - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - # Make sure we see the new room name - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - # Updated `event_stream_ordering` - event_stream_ordering=event_pos.stream, - # And since the event was a bump event, the `bump_stamp` should be updated - bump_stamp=event_pos.stream, - # The state is still the same (it didn't change) - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - user1_snapshot, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - user2_snapshot, - ) - - def test_joined_room_meta_state_reset(self) -> None: - """ - Test that a state reset on the room name is reflected in the - `sliding_sync_joined_rooms` table. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - - # Make sure we see the new room name - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id}, - exact=True, - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id], - _SlidingSyncJoinedRoomResult( - room_id=room_id, - # This should be whatever is the last event in the room - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - user1_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - user1_snapshot, - ) - # Holds the info according to the current state when the user joined (no room - # name when the room creator joined) - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - # Mock a state reset removing the room name state from the current state - message_tuple = self.get_success( - create_event( - self.hs, - prev_event_ids=[state_map[(EventTypes.Name, "")].event_id], - auth_event_ids=[ - state_map[(EventTypes.Create, "")].event_id, - state_map[(EventTypes.Member, user1_id)].event_id, - ], - type=EventTypes.Message, - content={"body": "foo", "msgtype": "m.text"}, - sender=user1_id, - room_id=room_id, - room_version=RoomVersions.V10.identifier, - ) - ) - event_chunk = [message_tuple] - self.get_success( - self.persist_events_store._persist_events_and_state_updates( - room_id, - event_chunk, - state_delta_for_room=DeltaState( - # This is the state reset part. We're removing the room name state. - to_delete=[(EventTypes.Name, "")], - to_insert={}, - ), - new_forward_extremities={message_tuple[0].event_id}, - use_negative_stream_ordering=False, - inhibit_local_membership_updates=False, - new_event_links={}, - ) - ) - - # Make sure the state reset is reflected in the `sliding_sync_joined_rooms` table - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id}, - exact=True, - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id], - _SlidingSyncJoinedRoomResult( - room_id=room_id, - # This should be whatever is the last event in the room - event_stream_ordering=message_tuple[ - 0 - ].internal_metadata.stream_ordering, - bump_stamp=message_tuple[0].internal_metadata.stream_ordering, - room_type=None, - # This was state reset back to None - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # State reset shouldn't be reflected in the `sliding_sync_membership_snapshots` - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Snapshots haven't changed - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - user1_snapshot, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - def test_non_join_space_room_with_info(self) -> None: - """ - Test users who was invited shows up in `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - space_room_id = self.helper.create_room_as( - user2_id, - tok=user2_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) - # Add a room name - self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space"}, - tok=user2_tok, - ) - # Encrypt the room - self.helper.send_state( - space_room_id, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user2_tok, - ) - # Add a tombstone - self.helper.send_state( - space_room_id, - EventTypes.Tombstone, - {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, - tok=user2_tok, - ) - - # User1 is invited to the room - user1_invited_response = self.helper.invite( - space_room_id, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_invited_event_pos = self.get_success( - self.store.get_position_for_event(user1_invited_response["event_id"]) - ) - - # Update the room name after we are invited just to make sure - # we don't update non-join memberships when the room name changes. - rename_response = self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space was renamed"}, - tok=user2_tok, - ) - rename_event_pos = self.get_success( - self.store.get_position_for_event(rename_response["event_id"]) - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(space_room_id) - ) - - # User2 is still joined to the room so we should still have an entry in the - # `sliding_sync_joined_rooms` table. - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {space_room_id}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[space_room_id], - _SlidingSyncJoinedRoomResult( - room_id=space_room_id, - event_stream_ordering=rename_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=RoomTypes.SPACE, - room_name="my super duper space was renamed", - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (space_room_id, user1_id), - (space_room_id, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user was invited - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_invited_response["event_id"], - membership=Membership.INVITE, - event_stream_ordering=user1_invited_event_pos.stream, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_invite_ban(self) -> None: - """ - Test users who have invite/ban membership in room shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - user3_id = self.register_user("user3", "pass") - user3_tok = self.login(user3_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 is invited to the room - user1_invited_response = self.helper.invite( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_invited_event_pos = self.get_success( - self.store.get_position_for_event(user1_invited_response["event_id"]) - ) - - # User3 joins the room - self.helper.join(room_id1, user3_id, tok=user3_tok) - # User3 is banned from the room - user3_ban_response = self.helper.ban( - room_id1, src=user2_id, targ=user3_id, tok=user2_tok - ) - user3_ban_event_pos = self.get_success( - self.store.get_position_for_event(user3_ban_response["event_id"]) - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id1) - ) - - # User2 is still joined to the room so we should still have an entry - # in the `sliding_sync_joined_rooms` table. - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - event_stream_ordering=user3_ban_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - (room_id1, user3_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user was invited - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_invited_response["event_id"], - membership=Membership.INVITE, - event_stream_ordering=user1_invited_event_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user was banned - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user3_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user3_id, - sender=user2_id, - membership_event_id=user3_ban_response["event_id"], - membership=Membership.BAN, - event_stream_ordering=user3_ban_event_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_reject_invite_empty_room(self) -> None: - """ - In a room where no one is joined (`no_longer_in_room`), test rejecting an invite. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 is invited to the room - self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) - - # User2 leaves the room - user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) - user2_leave_event_pos = self.get_success( - self.store.get_position_for_event(user2_leave_response["event_id"]) - ) - - # User1 rejects the invite - user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) - user1_leave_event_pos = self.get_success( - self.store.get_position_for_event(user1_leave_response["event_id"]) - ) - - # No one is joined to the room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user left - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=user1_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user1_leave_event_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the left - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=user2_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user2_leave_event_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_changing(self) -> None: - """ - Test latest snapshot evolves when membership changes (`sliding_sync_membership_snapshots`). - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 is invited to the room - # ====================================================== - user1_invited_response = self.helper.invite( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_invited_event_pos = self.get_success( - self.store.get_position_for_event(user1_invited_response["event_id"]) - ) - - # Update the room name after the user was invited - room_name_update_response = self.helper.send_state( - room_id1, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - room_name_update_event_pos = self.get_success( - self.store.get_position_for_event(room_name_update_response["event_id"]) - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id1) - ) - - # Assert joined room status - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - # Latest event in the room - event_stream_ordering=room_name_update_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # Assert membership snapshots - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user was invited - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_invited_response["event_id"], - membership=Membership.INVITE, - event_stream_ordering=user1_invited_event_pos.stream, - has_known_state=True, - room_type=None, - # Room name was updated after the user was invited so we should still - # see it unset here - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user joined - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - user2_snapshot, - ) - - # User1 joins the room - # ====================================================== - user1_joined_response = self.helper.join(room_id1, user1_id, tok=user1_tok) - user1_joined_event_pos = self.get_success( - self.store.get_position_for_event(user1_joined_response["event_id"]) - ) - - # Assert joined room status - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - # Latest event in the room - event_stream_ordering=user1_joined_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # Assert membership snapshots - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=user1_joined_response["event_id"], - membership=Membership.JOIN, - event_stream_ordering=user1_joined_event_pos.stream, - has_known_state=True, - room_type=None, - # We see the update state because the user joined after the room name - # change - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - user2_snapshot, - ) - - # User1 is banned from the room - # ====================================================== - user1_ban_response = self.helper.ban( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_ban_event_pos = self.get_success( - self.store.get_position_for_event(user1_ban_response["event_id"]) - ) - - # Assert joined room status - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id1}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id1], - _SlidingSyncJoinedRoomResult( - room_id=room_id1, - # Latest event in the room - event_stream_ordering=user1_ban_event_pos.stream, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # Assert membership snapshots - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user was banned - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_ban_response["event_id"], - membership=Membership.BAN, - event_stream_ordering=user1_ban_event_pos.stream, - has_known_state=True, - room_type=None, - # We see the update state because the user joined after the room name - # change - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - user2_snapshot, - ) - - def test_non_join_server_left_room(self) -> None: - """ - Test everyone local leaves the room but their leave membership still shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id1, user1_id, tok=user1_tok) - - # User2 leaves the room - user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) - user2_leave_event_pos = self.get_success( - self.store.get_position_for_event(user2_leave_response["event_id"]) - ) - - # User1 leaves the room - user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) - user1_leave_event_pos = self.get_success( - self.store.get_position_for_event(user1_leave_response["event_id"]) - ) - - # No one is joined to the room anymore so we shouldn't have an entry in the - # `sliding_sync_joined_rooms` table. - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - # We should still see rows for the leave events (non-joins) - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id1, user1_id), - (room_id1, user2_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user1_id, - sender=user1_id, - membership_event_id=user1_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user1_leave_event_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id1, - user_id=user2_id, - sender=user2_id, - membership_event_id=user2_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user2_leave_event_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - @parameterized.expand( - [ - # No stripped state provided - ("none", None), - # Empty stripped state provided - ("empty", []), - ] - ) - def test_non_join_remote_invite_no_stripped_state( - self, _description: str, stripped_state: Optional[List[StrippedStateEvent]] - ) -> None: - """ - Test remote invite with no stripped state provided shows up in - `sliding_sync_membership_snapshots` with `has_known_state=False`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - - # Create a remote invite room without any `unsigned.invite_room_state` - remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user(user1_id, stripped_state) - ) - - # No one local is joined to the remote room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (remote_invite_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (remote_invite_room_id, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=remote_invite_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=remote_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, - # No stripped state provided - has_known_state=False, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_remote_invite_unencrypted_room(self) -> None: - """ - Test remote invite with stripped state (unencrypted room) shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - - # Create a remote invite room with some `unsigned.invite_room_state` - # indicating that the room is encrypted. - remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - StrippedStateEvent( - type=EventTypes.Name, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_NAME: "my super duper room", - }, - ), - ], - ) - ) - - # No one local is joined to the remote room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (remote_invite_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (remote_invite_room_id, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=remote_invite_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=remote_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_remote_invite_encrypted_room(self) -> None: - """ - Test remote invite with stripped state (encrypted room) shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - - # Create a remote invite room with some `unsigned.invite_room_state` - # indicating that the room is encrypted. - remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - StrippedStateEvent( - type=EventTypes.RoomEncryption, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", - }, - ), - # This is not one of the stripped state events according to the state - # but we still handle it. - StrippedStateEvent( - type=EventTypes.Tombstone, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room", - }, - ), - # Also test a random event that we don't care about - StrippedStateEvent( - type="org.matrix.foo_state", - state_key="", - sender="@inviter:remote_server", - content={ - "foo": "qux", - }, - ), - ], - ) - ) - - # No one local is joined to the remote room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (remote_invite_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (remote_invite_room_id, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=remote_invite_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=remote_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - - def test_non_join_remote_invite_space_room(self) -> None: - """ - Test remote invite with stripped state (encrypted space room with name) shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - - # Create a remote invite room with some `unsigned.invite_room_state` - # indicating that the room is encrypted. - remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - # Specify that it is a space room - EventContentFields.ROOM_TYPE: RoomTypes.SPACE, - }, - ), - StrippedStateEvent( - type=EventTypes.RoomEncryption, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", - }, - ), - StrippedStateEvent( - type=EventTypes.Name, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_NAME: "my super duper space", - }, - ), - ], - ) - ) - - # No one local is joined to the remote room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (remote_invite_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (remote_invite_room_id, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=remote_invite_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=remote_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_reject_remote_invite(self) -> None: - """ - Test rejected remote invite (user decided to leave the room) inherits meta data - from when the remote invite stripped state and shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - # Create a remote invite room with some `unsigned.invite_room_state` - # indicating that the room is encrypted. - remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - StrippedStateEvent( - type=EventTypes.RoomEncryption, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", - }, - ), - ], - ) - ) - - # User1 decides to leave the room (reject the invite) - user1_leave_response = self.helper.leave( - remote_invite_room_id, user1_id, tok=user1_tok - ) - user1_leave_pos = self.get_success( - self.store.get_position_for_event(user1_leave_response["event_id"]) - ) - - # No one local is joined to the remote room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (remote_invite_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (remote_invite_room_id, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=remote_invite_room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=user1_leave_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=user1_leave_pos.stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_retracted_remote_invite(self) -> None: - """ - Test retracted remote invite (Remote inviter kicks the person who was invited) - inherits meta data from when the remote invite stripped state and shows up in - `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - - # Create a remote invite room with some `unsigned.invite_room_state` - # indicating that the room is encrypted. - remote_invite_room_id, remote_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - StrippedStateEvent( - type=EventTypes.RoomEncryption, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", - }, - ), - ], - ) - ) - - # `@inviter:remote_server` decides to retract the invite (kicks the user). - # (Note: A kick is just a leave event with a different sender) - remote_invite_retraction_event = self._retract_remote_invite_for_user( - user_id=user1_id, - remote_room_id=remote_invite_room_id, - ) - - # No one local is joined to the remote room - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (remote_invite_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (remote_invite_room_id, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=remote_invite_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=remote_invite_retraction_event.event_id, - membership=Membership.LEAVE, - event_stream_ordering=remote_invite_retraction_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - - def test_non_join_state_reset(self) -> None: - """ - Test a state reset that removes someone from the room. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - - # Make sure we see the new room name - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id}, - exact=True, - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id], - _SlidingSyncJoinedRoomResult( - room_id=room_id, - # This should be whatever is the last event in the room - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - user1_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - user1_snapshot, - ) - # Holds the info according to the current state when the user joined (no room - # name when the room creator joined) - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - # Mock a state reset removing the membership for user1 in the current state - message_tuple = self.get_success( - create_event( - self.hs, - prev_event_ids=[state_map[(EventTypes.Name, "")].event_id], - auth_event_ids=[ - state_map[(EventTypes.Create, "")].event_id, - state_map[(EventTypes.Member, user1_id)].event_id, - ], - type=EventTypes.Message, - content={"body": "foo", "msgtype": "m.text"}, - sender=user1_id, - room_id=room_id, - room_version=RoomVersions.V10.identifier, - ) - ) - event_chunk = [message_tuple] - self.get_success( - self.persist_events_store._persist_events_and_state_updates( - room_id, - event_chunk, - state_delta_for_room=DeltaState( - # This is the state reset part. We're removing the room name state. - to_delete=[(EventTypes.Member, user1_id)], - to_insert={}, - ), - new_forward_extremities={message_tuple[0].event_id}, - use_negative_stream_ordering=False, - inhibit_local_membership_updates=False, - new_event_links={}, - ) - ) - - # State reset on membership doesn't affect the`sliding_sync_joined_rooms` table - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id}, - exact=True, - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id], - _SlidingSyncJoinedRoomResult( - room_id=room_id, - # This should be whatever is the last event in the room - event_stream_ordering=message_tuple[ - 0 - ].internal_metadata.stream_ordering, - bump_stamp=message_tuple[0].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # State reset on membership should remove the user's snapshot - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - # We shouldn't see user1 in the snapshots table anymore - (room_id, user2_id), - }, - exact=True, - ) - # Snapshot for user2 hasn't changed - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - def test_joined_background_update_missing(self) -> None: - """ - Test that the background update for `sliding_sync_joined_rooms` populates missing rows - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_no_info = self.helper.create_room_as(user1_id, tok=user1_tok) - - room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) - # Add a room name - self.helper.send_state( - room_id_with_info, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user1_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id_with_info, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user1_tok, - ) - - space_room_id = self.helper.create_room_as( - user1_id, - tok=user1_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) - # Add a room name - self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space"}, - tok=user1_tok, - ) - - # Clean-up the `sliding_sync_joined_rooms` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_joined_rooms", - column="room_id", - iterable=(room_id_no_info, room_id_with_info, space_room_id), - keyvalues={}, - desc="sliding_sync_joined_rooms.test_joined_background_update_missing", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id_no_info, room_id_with_info, space_room_id}, - exact=True, - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id_no_info) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id_no_info], - _SlidingSyncJoinedRoomResult( - room_id=room_id_no_info, - # History visibility just happens to be the last event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomHistoryVisibility, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id_with_info) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id_with_info], - _SlidingSyncJoinedRoomResult( - room_id=room_id_with_info, - # Lastest event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomEncryption, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(space_room_id) - ) - self.assertEqual( - sliding_sync_joined_rooms_results[space_room_id], - _SlidingSyncJoinedRoomResult( - room_id=space_room_id, - # Lastest event sent in the room - event_stream_ordering=state_map[ - (EventTypes.Name, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_joined_background_update_partial(self) -> None: - """ - Test that the background update for `sliding_sync_joined_rooms` populates - partially updated rows. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) - # Add a room name - self.helper.send_state( - room_id_with_info, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user1_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id_with_info, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user1_tok, - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id_with_info) - ) - - # Clean-up the `sliding_sync_joined_rooms` table as if the the encryption event - # never made it into the table. - self.get_success( - self.store.db_pool.simple_update( - table="sliding_sync_joined_rooms", - keyvalues={"room_id": room_id_with_info}, - updatevalues={"is_encrypted": False}, - desc="sliding_sync_joined_rooms.test_joined_background_update_partial", - ) - ) - - # We should see the partial row that we made in preparation for the test. - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id_with_info}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id_with_info], - _SlidingSyncJoinedRoomResult( - room_id=room_id_with_info, - # Lastest event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomEncryption, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id_with_info}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id_with_info], - _SlidingSyncJoinedRoomResult( - room_id=room_id_with_info, - # Lastest event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomEncryption, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_snapshots_background_update_joined(self) -> None: - """ - Test that the background update for `sliding_sync_membership_snapshots` - populates missing rows for join memberships. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_no_info = self.helper.create_room_as(user1_id, tok=user1_tok) - - room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) - # Add a room name - self.helper.send_state( - room_id_with_info, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user1_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id_with_info, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user1_tok, - ) - # Add a tombstone - self.helper.send_state( - room_id_with_info, - EventTypes.Tombstone, - {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, - tok=user1_tok, - ) - - space_room_id = self.helper.create_room_as( - user1_id, - tok=user1_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) - # Add a room name - self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space"}, - tok=user1_tok, - ) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=(room_id_no_info, room_id_with_info, space_room_id), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_joined", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id_no_info, user1_id), - (room_id_with_info, user1_id), - (space_room_id, user1_id), - }, - exact=True, - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id_no_info) - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_no_info, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id_with_info) - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_with_info, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_with_info, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - state_map = self.get_success( - self.storage_controllers.state.get_current_state(space_room_id) - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_snapshots_background_update_local_invite(self) -> None: - """ - Test that the background update for `sliding_sync_membership_snapshots` - populates missing rows for invite memberships. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_no_info = self.helper.create_room_as(user2_id, tok=user2_tok) - - room_id_with_info = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id_with_info, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id_with_info, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user2_tok, - ) - # Add a tombstone - self.helper.send_state( - room_id_with_info, - EventTypes.Tombstone, - {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, - tok=user2_tok, - ) - - space_room_id = self.helper.create_room_as( - user1_id, - tok=user2_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) - # Add a room name - self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space"}, - tok=user2_tok, - ) - - # Invite user1 to the rooms - user1_invite_room_id_no_info_response = self.helper.invite( - room_id_no_info, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_invite_room_id_with_info_response = self.helper.invite( - room_id_with_info, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_invite_space_room_id_response = self.helper.invite( - space_room_id, src=user2_id, targ=user1_id, tok=user2_tok - ) - - # Have user2 leave the rooms to make sure that our background update is not just - # reading from `current_state_events`. For invite/knock memberships, we should - # be reading from the stripped state on the invite/knock event itself. - self.helper.leave(room_id_no_info, user2_id, tok=user2_tok) - self.helper.leave(room_id_with_info, user2_id, tok=user2_tok) - self.helper.leave(space_room_id, user2_id, tok=user2_tok) - # Check to make sure we actually don't have any `current_state_events` for the rooms - current_state_check_rows = self.get_success( - self.store.db_pool.simple_select_many_batch( - table="current_state_events", - column="room_id", - iterable=[room_id_no_info, room_id_with_info, space_room_id], - retcols=("event_id",), - keyvalues={}, - desc="check current_state_events in test", - ) - ) - self.assertEqual(len(current_state_check_rows), 0) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=(room_id_no_info, room_id_with_info, space_room_id), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_local_invite", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - # The invite memberships for user1 - (room_id_no_info, user1_id), - (room_id_with_info, user1_id), - (space_room_id, user1_id), - # The leave memberships for user2 - (room_id_no_info, user2_id), - (room_id_with_info, user2_id), - (space_room_id, user2_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_no_info, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_invite_room_id_no_info_response["event_id"], - membership=Membership.INVITE, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - user1_invite_room_id_no_info_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_with_info, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_with_info, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_invite_room_id_with_info_response["event_id"], - membership=Membership.INVITE, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - user1_invite_room_id_with_info_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - # The tombstone isn't showing here ("another_room") because it's not one - # of the stripped events that we hand out as part of the invite event. - # Even though we handle this scenario from other remote homservers, - # Synapse does not include the tombstone in the invite event. - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - sender=user2_id, - membership_event_id=user1_invite_space_room_id_response["event_id"], - membership=Membership.INVITE, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - user1_invite_space_room_id_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_snapshots_background_update_remote_invite( - self, - ) -> None: - """ - Test that the background update for `sliding_sync_membership_snapshots` - populates missing rows for remote invites (out-of-band memberships). - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_unknown_state, room_id_unknown_state_invite_event = ( - self._create_remote_invite_room_for_user(user1_id, None) - ) - - room_id_no_info, room_id_no_info_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - ], - ) - ) - - room_id_with_info, room_id_with_info_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - StrippedStateEvent( - type=EventTypes.Name, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_NAME: "my super duper room", - }, - ), - StrippedStateEvent( - type=EventTypes.RoomEncryption, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", - }, - ), - ], - ) - ) - - space_room_id, space_room_id_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - EventContentFields.ROOM_TYPE: RoomTypes.SPACE, - }, - ), - StrippedStateEvent( - type=EventTypes.Name, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_NAME: "my super duper space", - }, - ), - ], - ) - ) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=( - room_id_unknown_state, - room_id_no_info, - room_id_with_info, - space_room_id, - ), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_remote_invite", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - # The invite memberships for user1 - (room_id_unknown_state, user1_id), - (room_id_no_info, user1_id), - (room_id_with_info, user1_id), - (space_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_unknown_state, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_unknown_state, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=room_id_unknown_state_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=room_id_unknown_state_invite_event.internal_metadata.stream_ordering, - has_known_state=False, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_no_info, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=room_id_no_info_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=room_id_no_info_invite_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_with_info, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_with_info, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=room_id_with_info_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=room_id_with_info_invite_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=space_room_id_invite_event.event_id, - membership=Membership.INVITE, - event_stream_ordering=space_room_id_invite_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_snapshots_background_update_remote_invite_rejections_and_retractions( - self, - ) -> None: - """ - Test that the background update for `sliding_sync_membership_snapshots` - populates missing rows for remote invite rejections/retractions (out-of-band memberships). - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_unknown_state, room_id_unknown_state_invite_event = ( - self._create_remote_invite_room_for_user(user1_id, None) - ) - - room_id_no_info, room_id_no_info_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - ], - ) - ) - - room_id_with_info, room_id_with_info_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - }, - ), - StrippedStateEvent( - type=EventTypes.Name, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_NAME: "my super duper room", - }, - ), - StrippedStateEvent( - type=EventTypes.RoomEncryption, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", - }, - ), - ], - ) - ) - - space_room_id, space_room_id_invite_event = ( - self._create_remote_invite_room_for_user( - user1_id, - [ - StrippedStateEvent( - type=EventTypes.Create, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_CREATOR: "@inviter:remote_server", - EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, - EventContentFields.ROOM_TYPE: RoomTypes.SPACE, - }, - ), - StrippedStateEvent( - type=EventTypes.Name, - state_key="", - sender="@inviter:remote_server", - content={ - EventContentFields.ROOM_NAME: "my super duper space", - }, - ), - ], - ) - ) - - # Reject the remote invites. - # Also try retracting a remote invite. - room_id_unknown_state_leave_event_response = self.helper.leave( - room_id_unknown_state, user1_id, tok=user1_tok - ) - room_id_no_info_leave_event = self._retract_remote_invite_for_user( - user_id=user1_id, - remote_room_id=room_id_no_info, - ) - room_id_with_info_leave_event_response = self.helper.leave( - room_id_with_info, user1_id, tok=user1_tok - ) - space_room_id_leave_event = self._retract_remote_invite_for_user( - user_id=user1_id, - remote_room_id=space_room_id, - ) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=( - room_id_unknown_state, - room_id_no_info, - room_id_with_info, - space_room_id, - ), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_remote_invite_rejections_and_retractions", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - # The invite memberships for user1 - (room_id_unknown_state, user1_id), - (room_id_no_info, user1_id), - (room_id_with_info, user1_id), - (space_room_id, user1_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_unknown_state, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_unknown_state, - user_id=user1_id, - sender=user1_id, - membership_event_id=room_id_unknown_state_leave_event_response[ - "event_id" - ], - membership=Membership.LEAVE, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - room_id_unknown_state_leave_event_response["event_id"] - ) - ).stream, - has_known_state=False, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_no_info, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=room_id_no_info_leave_event.event_id, - membership=Membership.LEAVE, - event_stream_ordering=room_id_no_info_leave_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_with_info, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_with_info, - user_id=user1_id, - sender=user1_id, - membership_event_id=room_id_with_info_leave_event_response["event_id"], - membership=Membership.LEAVE, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - room_id_with_info_leave_event_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - sender="@inviter:remote_server", - membership_event_id=space_room_id_leave_event.event_id, - membership=Membership.LEAVE, - event_stream_ordering=space_room_id_leave_event.internal_metadata.stream_ordering, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - @parameterized.expand( - [ - # We'll do a kick for this - (Membership.LEAVE,), - (Membership.BAN,), - ] - ) - def test_membership_snapshots_background_update_historical_state( - self, test_membership: str - ) -> None: - """ - Test that the background update for `sliding_sync_membership_snapshots` - populates missing rows for leave memberships. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_no_info = self.helper.create_room_as(user2_id, tok=user2_tok) - - room_id_with_info = self.helper.create_room_as(user2_id, tok=user2_tok) - # Add a room name - self.helper.send_state( - room_id_with_info, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user2_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id_with_info, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user2_tok, - ) - # Add a tombstone - self.helper.send_state( - room_id_with_info, - EventTypes.Tombstone, - {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, - tok=user2_tok, - ) - - space_room_id = self.helper.create_room_as( - user1_id, - tok=user2_tok, - extra_content={ - "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} - }, - ) - # Add a room name - self.helper.send_state( - space_room_id, - EventTypes.Name, - {"name": "my super duper space"}, - tok=user2_tok, - ) - - # Join the room in preparation for our test_membership - self.helper.join(room_id_no_info, user1_id, tok=user1_tok) - self.helper.join(room_id_with_info, user1_id, tok=user1_tok) - self.helper.join(space_room_id, user1_id, tok=user1_tok) - - if test_membership == Membership.LEAVE: - # Kick user1 from the rooms - user1_membership_room_id_no_info_response = self.helper.change_membership( - room=room_id_no_info, - src=user2_id, - targ=user1_id, - tok=user2_tok, - membership=Membership.LEAVE, - extra_data={ - "reason": "Bad manners", - }, - ) - user1_membership_room_id_with_info_response = self.helper.change_membership( - room=room_id_with_info, - src=user2_id, - targ=user1_id, - tok=user2_tok, - membership=Membership.LEAVE, - extra_data={ - "reason": "Bad manners", - }, - ) - user1_membership_space_room_id_response = self.helper.change_membership( - room=space_room_id, - src=user2_id, - targ=user1_id, - tok=user2_tok, - membership=Membership.LEAVE, - extra_data={ - "reason": "Bad manners", - }, - ) - elif test_membership == Membership.BAN: - # Ban user1 from the rooms - user1_membership_room_id_no_info_response = self.helper.ban( - room_id_no_info, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_membership_room_id_with_info_response = self.helper.ban( - room_id_with_info, src=user2_id, targ=user1_id, tok=user2_tok - ) - user1_membership_space_room_id_response = self.helper.ban( - space_room_id, src=user2_id, targ=user1_id, tok=user2_tok - ) - else: - raise AssertionError("Unknown test_membership") - - # Have user2 leave the rooms to make sure that our background update is not just - # reading from `current_state_events`. For leave memberships, we should be - # reading from the historical state. - self.helper.leave(room_id_no_info, user2_id, tok=user2_tok) - self.helper.leave(room_id_with_info, user2_id, tok=user2_tok) - self.helper.leave(space_room_id, user2_id, tok=user2_tok) - # Check to make sure we actually don't have any `current_state_events` for the rooms - current_state_check_rows = self.get_success( - self.store.db_pool.simple_select_many_batch( - table="current_state_events", - column="room_id", - iterable=[room_id_no_info, room_id_with_info, space_room_id], - retcols=("event_id",), - keyvalues={}, - desc="check current_state_events in test", - ) - ) - self.assertEqual(len(current_state_check_rows), 0) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=(room_id_no_info, room_id_with_info, space_room_id), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_historical_state", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - # The memberships for user1 - (room_id_no_info, user1_id), - (room_id_with_info, user1_id), - (space_room_id, user1_id), - # The leave memberships for user2 - (room_id_no_info, user2_id), - (room_id_with_info, user2_id), - (space_room_id, user2_id), - }, - exact=True, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_no_info, - user_id=user1_id, - # Because user2 kicked/banned user1 from the room - sender=user2_id, - membership_event_id=user1_membership_room_id_no_info_response[ - "event_id" - ], - membership=test_membership, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - user1_membership_room_id_no_info_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get( - (room_id_with_info, user1_id) - ), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id_with_info, - user_id=user1_id, - # Because user2 kicked/banned user1 from the room - sender=user2_id, - membership_event_id=user1_membership_room_id_with_info_response[ - "event_id" - ], - membership=test_membership, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - user1_membership_room_id_with_info_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id="another_room", - ), - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=space_room_id, - user_id=user1_id, - # Because user2 kicked/banned user1 from the room - sender=user2_id, - membership_event_id=user1_membership_space_room_id_response["event_id"], - membership=test_membership, - event_stream_ordering=self.get_success( - self.store.get_position_for_event( - user1_membership_space_room_id_response["event_id"] - ) - ).stream, - has_known_state=True, - room_type=RoomTypes.SPACE, - room_name="my super duper space", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_snapshots_background_update_forgotten_missing(self) -> None: - """ - Test that a new row is inserted into `sliding_sync_membership_snapshots` when it - doesn't exist in the table yet. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - # User1 leaves the room (we have to leave in order to forget the room) - self.helper.leave(room_id, user1_id, tok=user1_tok) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - - # Forget the room - channel = self.make_request( - "POST", - f"/_matrix/client/r0/rooms/{room_id}/forget", - content={}, - access_token=user1_tok, - ) - self.assertEqual(channel.code, 200, channel.result) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=(room_id,), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.LEAVE, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - # Room is forgotten - forgotten=True, - ), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - def test_membership_snapshots_background_update_forgotten_partial(self) -> None: - """ - Test an existing `sliding_sync_membership_snapshots` row is updated with the - latest `forgotten` status after the background update passes over it. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - # User1 leaves the room (we have to leave in order to forget the room) - self.helper.leave(room_id, user1_id, tok=user1_tok) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - - # Forget the room - channel = self.make_request( - "POST", - f"/_matrix/client/r0/rooms/{room_id}/forget", - content={}, - access_token=user1_tok, - ) - self.assertEqual(channel.code, 200, channel.result) - - # Clean-up the `sliding_sync_joined_rooms` table as if the forgotten status - # never made it into the table. - self.get_success( - self.store.db_pool.simple_update( - table="sliding_sync_membership_snapshots", - keyvalues={"room_id": room_id}, - updatevalues={"forgotten": 0}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_partial", - ) - ) - - # We should see the partial row that we made in preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - user1_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.LEAVE, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - # Room is *not* forgotten because of our test preparation - forgotten=False, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - user1_snapshot, - ) - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Forgotten status is now updated - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - attr.evolve(user1_snapshot, forgotten=True), - ) - # Holds the info according to the current state when the user joined - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - def test_membership_snapshot_forget(self) -> None: - """ - Test forgetting a room will update `sliding_sync_membership_snapshots` - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - # User1 leaves the room (we have to leave in order to forget the room) - self.helper.leave(room_id, user1_id, tok=user1_tok) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - - # Check on the `sliding_sync_membership_snapshots` table (nothing should be - # forgotten yet) - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - user1_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.LEAVE, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - # Room is not forgotten - forgotten=False, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - user1_snapshot, - ) - # Holds the info according to the current state when the user joined - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - # Forget the room - channel = self.make_request( - "POST", - f"/_matrix/client/r0/rooms/{room_id}/forget", - content={}, - access_token=user1_tok, - ) - self.assertEqual(channel.code, 200, channel.result) - - # Check on the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Room is now forgotten for user1 - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - attr.evolve(user1_snapshot, forgotten=True), - ) - # Nothing changed for user2 - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - def test_membership_snapshot_missing_forget( - self, - ) -> None: - """ - Test forgetting a room with no existing row in `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - # User1 leaves the room (we have to leave in order to forget the room) - self.helper.leave(room_id, user1_id, tok=user1_tok) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=(room_id,), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Forget the room - channel = self.make_request( - "POST", - f"/_matrix/client/r0/rooms/{room_id}/forget", - content={}, - access_token=user1_tok, - ) - self.assertEqual(channel.code, 200, channel.result) - - # It doesn't explode - - # We still shouldn't find anything in the table because nothing has re-created them - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py new file mode 100644 index 00000000000..34f42b6fd40 --- /dev/null +++ b/tests/storage/test_sliding_sync_tables.py @@ -0,0 +1,4159 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2024 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# +# Originally licensed under the Apache License, Version 2.0: +# . +# +# [This file includes modifications made by New Vector Limited] +# +# +import logging +from typing import Dict, List, Optional, Tuple, cast + +import attr +from parameterized import parameterized + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes +from synapse.api.room_versions import RoomVersions +from synapse.events import EventBase, StrippedStateEvent, make_event_from_dict +from synapse.events.snapshot import EventContext +from synapse.rest import admin +from synapse.rest.client import login, room +from synapse.server import HomeServer +from synapse.storage.databases.main.events import DeltaState +from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates +from synapse.util import Clock + +from tests.test_utils.event_injection import create_event +from tests.unittest import HomeserverTestCase + +logger = logging.getLogger(__name__) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _SlidingSyncJoinedRoomResult: + room_id: str + # `event_stream_ordering` is only optional to allow easier semantics when we make + # expected objects from `event.internal_metadata.stream_ordering`. in the tests. + # `event.internal_metadata.stream_ordering` is marked optional because it only + # exists for persisted events but in the context of these tests, we're only working + # with persisted events and we're making comparisons so we will find any mismatch. + event_stream_ordering: Optional[int] + bump_stamp: Optional[int] + room_type: Optional[str] + room_name: Optional[str] + is_encrypted: bool + tombstone_successor_room_id: Optional[str] + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _SlidingSyncMembershipSnapshotResult: + room_id: str + user_id: str + sender: str + membership_event_id: str + membership: str + # `event_stream_ordering` is only optional to allow easier semantics when we make + # expected objects from `event.internal_metadata.stream_ordering`. in the tests. + # `event.internal_metadata.stream_ordering` is marked optional because it only + # exists for persisted events but in the context of these tests, we're only working + # with persisted events and we're making comparisons so we will find any mismatch. + event_stream_ordering: Optional[int] + has_known_state: bool + room_type: Optional[str] + room_name: Optional[str] + is_encrypted: bool + tombstone_successor_room_id: Optional[str] + # Make this default to "not forgotten" because it doesn't apply to many tests and we + # don't want to force all of the tests to deal with it. + forgotten: bool = False + + +class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): + """ + Tests to make sure the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` database tables are + populated correctly. + """ + + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.storage_controllers = hs.get_storage_controllers() + persist_events_store = self.hs.get_datastores().persist_events + assert persist_events_store is not None + self.persist_events_store = persist_events_store + + def _get_sliding_sync_joined_rooms(self) -> Dict[str, _SlidingSyncJoinedRoomResult]: + """ + Return the rows from the `sliding_sync_joined_rooms` table. + + Returns: + Mapping from room_id to _SlidingSyncJoinedRoomResult. + """ + rows = cast( + List[Tuple[str, int, int, str, str, bool, str]], + self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_joined_rooms", + None, + retcols=( + "room_id", + "event_stream_ordering", + "bump_stamp", + "room_type", + "room_name", + "is_encrypted", + "tombstone_successor_room_id", + ), + ), + ), + ) + + return { + row[0]: _SlidingSyncJoinedRoomResult( + room_id=row[0], + event_stream_ordering=row[1], + bump_stamp=row[2], + room_type=row[3], + room_name=row[4], + is_encrypted=bool(row[5]), + tombstone_successor_room_id=row[6], + ) + for row in rows + } + + def _get_sliding_sync_membership_snapshots( + self, + ) -> Dict[Tuple[str, str], _SlidingSyncMembershipSnapshotResult]: + """ + Return the rows from the `sliding_sync_membership_snapshots` table. + + Returns: + Mapping from the (room_id, user_id) to _SlidingSyncMembershipSnapshotResult. + """ + rows = cast( + List[Tuple[str, str, str, str, str, int, int, bool, str, str, bool, str]], + self.get_success( + self.store.db_pool.simple_select_list( + "sliding_sync_membership_snapshots", + None, + retcols=( + "room_id", + "user_id", + "sender", + "membership_event_id", + "membership", + "forgotten", + "event_stream_ordering", + "has_known_state", + "room_type", + "room_name", + "is_encrypted", + "tombstone_successor_room_id", + ), + ), + ), + ) + + return { + (row[0], row[1]): _SlidingSyncMembershipSnapshotResult( + room_id=row[0], + user_id=row[1], + sender=row[2], + membership_event_id=row[3], + membership=row[4], + forgotten=bool(row[5]), + event_stream_ordering=row[6], + has_known_state=bool(row[7]), + room_type=row[8], + room_name=row[9], + is_encrypted=bool(row[10]), + tombstone_successor_room_id=row[11], + ) + for row in rows + } + + _remote_invite_count: int = 0 + + def _create_remote_invite_room_for_user( + self, + invitee_user_id: str, + unsigned_invite_room_state: Optional[List[StrippedStateEvent]], + ) -> Tuple[str, EventBase]: + """ + Create a fake invite for a remote room and persist it. + + We don't have any state for these kind of rooms and can only rely on the + stripped state included in the unsigned portion of the invite event to identify + the room. + + Args: + invitee_user_id: The person being invited + unsigned_invite_room_state: List of stripped state events to assist the + receiver in identifying the room. + + Returns: + The room ID of the remote invite room and the persisted remote invite event. + """ + invite_room_id = f"!test_room{self._remote_invite_count}:remote_server" + + invite_event_dict = { + "room_id": invite_room_id, + "sender": "@inviter:remote_server", + "state_key": invitee_user_id, + "depth": 1, + "origin_server_ts": 1, + "type": EventTypes.Member, + "content": {"membership": Membership.INVITE}, + "auth_events": [], + "prev_events": [], + } + if unsigned_invite_room_state is not None: + serialized_stripped_state_events = [] + for stripped_event in unsigned_invite_room_state: + serialized_stripped_state_events.append( + { + "type": stripped_event.type, + "state_key": stripped_event.state_key, + "sender": stripped_event.sender, + "content": stripped_event.content, + } + ) + + invite_event_dict["unsigned"] = { + "invite_room_state": serialized_stripped_state_events + } + + invite_event = make_event_from_dict( + invite_event_dict, + room_version=RoomVersions.V10, + ) + invite_event.internal_metadata.outlier = True + invite_event.internal_metadata.out_of_band_membership = True + + self.get_success( + self.store.maybe_store_room_on_outlier_membership( + room_id=invite_room_id, room_version=invite_event.room_version + ) + ) + context = EventContext.for_outlier(self.hs.get_storage_controllers()) + persist_controller = self.hs.get_storage_controllers().persistence + assert persist_controller is not None + persisted_event, _, _ = self.get_success( + persist_controller.persist_event(invite_event, context) + ) + + self._remote_invite_count += 1 + + return invite_room_id, persisted_event + + def _retract_remote_invite_for_user( + self, + user_id: str, + remote_room_id: str, + ) -> EventBase: + """ + Create a fake invite retraction for a remote room and persist it. + + Retracting an invite just means the person is no longer invited to the room. + This is done by someone with proper power levels kicking the user from the room. + A kick shows up as a leave event for a given person with a different `sender`. + + Args: + user_id: The person who was invited and we're going to retract the + invite for. + remote_room_id: The room ID that the invite was for. + + Returns: + The persisted leave (kick) event. + """ + + kick_event_dict = { + "room_id": remote_room_id, + "sender": "@inviter:remote_server", + "state_key": user_id, + "depth": 1, + "origin_server_ts": 1, + "type": EventTypes.Member, + "content": {"membership": Membership.LEAVE}, + "auth_events": [], + "prev_events": [], + } + + kick_event = make_event_from_dict( + kick_event_dict, + room_version=RoomVersions.V10, + ) + kick_event.internal_metadata.outlier = True + kick_event.internal_metadata.out_of_band_membership = True + + self.get_success( + self.store.maybe_store_room_on_outlier_membership( + room_id=remote_room_id, room_version=kick_event.room_version + ) + ) + context = EventContext.for_outlier(self.hs.get_storage_controllers()) + persist_controller = self.hs.get_storage_controllers().persistence + assert persist_controller is not None + persisted_event, _, _ = self.get_success( + persist_controller.persist_event(kick_event, context) + ) + + return persisted_event + + def test_joined_room_with_no_info(self) -> None: + """ + Test joined room that doesn't have a room type, encryption, or name shows up in + `sliding_sync_joined_rooms`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # History visibility just happens to be the last event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomHistoryVisibility, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_joined_room_with_info(self) -> None: + """ + Test joined encrypted room with name shows up in `sliding_sync_joined_rooms`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id1, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + # Add a tombstone + self.helper.send_state( + room_id1, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + # Even though this room does have a name, is encrypted, and has a + # tombstone, user2 is the room creator and joined at the room creation + # time which didn't have this state set yet. + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_joined_space_room_with_info(self) -> None: + """ + Test joined space room with name shows up in `sliding_sync_joined_rooms`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + space_room_id = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # User1 joins the room + user1_join_response = self.helper.join(space_room_id, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {space_room_id}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (space_room_id, user1_id), + (space_room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + # Even though this room does have a name, user2 is the room creator and + # joined at the room creation time which didn't have this state set yet. + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_joined_room_with_state_updated(self) -> None: + """ + Test state derived info in `sliding_sync_joined_rooms` is updated when the + current state is updated. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + + # Update the room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room was renamed"}, + tok=user2_tok, + ) + # Encrypt the room + encrypt_room_response = self.helper.send_state( + room_id1, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + encrypt_room_event_pos = self.get_success( + self.store.get_position_for_event(encrypt_room_response["event_id"]) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + # Make sure we see the new room name + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + event_stream_ordering=encrypt_room_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room was renamed", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_joined_room_is_bumped(self) -> None: + """ + Test that `event_stream_ordering` and `bump_stamp` is updated when a new bump + event is sent (`sliding_sync_joined_rooms`). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_join_event_pos = self.get_success( + self.store.get_position_for_event(user1_join_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + event_stream_ordering=user1_join_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + # Send a new message to bump the room + event_response = self.helper.send(room_id1, "some message", tok=user1_tok) + event_pos = self.get_success( + self.store.get_position_for_event(event_response["event_id"]) + ) + + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + # Make sure we see the new room name + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Updated `event_stream_ordering` + event_stream_ordering=event_pos.stream, + # And since the event was a bump event, the `bump_stamp` should be updated + bump_stamp=event_pos.stream, + # The state is still the same (it didn't change) + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + user1_snapshot, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + def test_joined_room_meta_state_reset(self) -> None: + """ + Test that a state reset on the room name is reflected in the + `sliding_sync_joined_rooms` table. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Make sure we see the new room name + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined (no room + # name when the room creator joined) + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Mock a state reset removing the room name state from the current state + message_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[state_map[(EventTypes.Name, "")].event_id], + auth_event_ids=[ + state_map[(EventTypes.Create, "")].event_id, + state_map[(EventTypes.Member, user1_id)].event_id, + ], + type=EventTypes.Message, + content={"body": "foo", "msgtype": "m.text"}, + sender=user1_id, + room_id=room_id, + room_version=RoomVersions.V10.identifier, + ) + ) + event_chunk = [message_tuple] + self.get_success( + self.persist_events_store._persist_events_and_state_updates( + room_id, + event_chunk, + state_delta_for_room=DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Name, "")], + to_insert={}, + ), + new_forward_extremities={message_tuple[0].event_id}, + use_negative_stream_ordering=False, + inhibit_local_membership_updates=False, + new_event_links={}, + ) + ) + + # Make sure the state reset is reflected in the `sliding_sync_joined_rooms` table + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=message_tuple[ + 0 + ].internal_metadata.stream_ordering, + bump_stamp=message_tuple[0].internal_metadata.stream_ordering, + room_type=None, + # This was state reset back to None + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + # State reset shouldn't be reflected in the `sliding_sync_membership_snapshots` + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Snapshots haven't changed + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_non_join_space_room_with_info(self) -> None: + """ + Test users who was invited shows up in `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + space_room_id = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + space_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + # Add a tombstone + self.helper.send_state( + space_room_id, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) + + # User1 is invited to the room + user1_invited_response = self.helper.invite( + space_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invited_event_pos = self.get_success( + self.store.get_position_for_event(user1_invited_response["event_id"]) + ) + + # Update the room name after we are invited just to make sure + # we don't update non-join memberships when the room name changes. + rename_response = self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space was renamed"}, + tok=user2_tok, + ) + rename_event_pos = self.get_success( + self.store.get_position_for_event(rename_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + + # User2 is still joined to the room so we should still have an entry in the + # `sliding_sync_joined_rooms` table. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {space_room_id}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + event_stream_ordering=rename_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name="my super duper space was renamed", + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (space_room_id, user1_id), + (space_room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was invited + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_invited_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=user1_invited_event_pos.stream, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_invite_ban(self) -> None: + """ + Test users who have invite/ban membership in room shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + user3_tok = self.login(user3_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to the room + user1_invited_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invited_event_pos = self.get_success( + self.store.get_position_for_event(user1_invited_response["event_id"]) + ) + + # User3 joins the room + self.helper.join(room_id1, user3_id, tok=user3_tok) + # User3 is banned from the room + user3_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user3_id, tok=user2_tok + ) + user3_ban_event_pos = self.get_success( + self.store.get_position_for_event(user3_ban_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + # User2 is still joined to the room so we should still have an entry + # in the `sliding_sync_joined_rooms` table. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + event_stream_ordering=user3_ban_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + (room_id1, user3_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was invited + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_invited_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=user1_invited_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user was banned + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user3_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user3_id, + sender=user2_id, + membership_event_id=user3_ban_response["event_id"], + membership=Membership.BAN, + event_stream_ordering=user3_ban_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_reject_invite_empty_room(self) -> None: + """ + In a room where no one is joined (`no_longer_in_room`), test rejecting an invite. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to the room + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + + # User2 leaves the room + user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) + user2_leave_event_pos = self.get_success( + self.store.get_position_for_event(user2_leave_response["event_id"]) + ) + + # User1 rejects the invite + user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) + user1_leave_event_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + + # No one is joined to the room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user left + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the left + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=user2_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user2_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_changing(self) -> None: + """ + Test latest snapshot evolves when membership changes (`sliding_sync_membership_snapshots`). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to the room + # ====================================================== + user1_invited_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invited_event_pos = self.get_success( + self.store.get_position_for_event(user1_invited_response["event_id"]) + ) + + # Update the room name after the user was invited + room_name_update_response = self.helper.send_state( + room_id1, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + room_name_update_event_pos = self.get_success( + self.store.get_position_for_event(room_name_update_response["event_id"]) + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + # Assert joined room status + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Latest event in the room + event_stream_ordering=room_name_update_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + # Assert membership snapshots + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was invited + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_invited_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=user1_invited_event_pos.stream, + has_known_state=True, + room_type=None, + # Room name was updated after the user was invited so we should still + # see it unset here + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + # User1 joins the room + # ====================================================== + user1_joined_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + user1_joined_event_pos = self.get_success( + self.store.get_position_for_event(user1_joined_response["event_id"]) + ) + + # Assert joined room status + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Latest event in the room + event_stream_ordering=user1_joined_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + # Assert membership snapshots + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=user1_joined_response["event_id"], + membership=Membership.JOIN, + event_stream_ordering=user1_joined_event_pos.stream, + has_known_state=True, + room_type=None, + # We see the update state because the user joined after the room name + # change + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + # User1 is banned from the room + # ====================================================== + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_ban_event_pos = self.get_success( + self.store.get_position_for_event(user1_ban_response["event_id"]) + ) + + # Assert joined room status + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id1}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id1], + _SlidingSyncJoinedRoomResult( + room_id=room_id1, + # Latest event in the room + event_stream_ordering=user1_ban_event_pos.stream, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + # Assert membership snapshots + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user was banned + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_ban_response["event_id"], + membership=Membership.BAN, + event_stream_ordering=user1_ban_event_pos.stream, + has_known_state=True, + room_type=None, + # We see the update state because the user joined after the room name + # change + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + user2_snapshot, + ) + + def test_non_join_server_left_room(self) -> None: + """ + Test everyone local leaves the room but their leave membership still shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # User2 leaves the room + user2_leave_response = self.helper.leave(room_id1, user2_id, tok=user2_tok) + user2_leave_event_pos = self.get_success( + self.store.get_position_for_event(user2_leave_response["event_id"]) + ) + + # User1 leaves the room + user1_leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) + user1_leave_event_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + + # No one is joined to the room anymore so we shouldn't have an entry in the + # `sliding_sync_joined_rooms` table. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # We should still see rows for the leave events (non-joins) + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id1, user1_id), + (room_id1, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user1_id, + sender=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id1, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id1, + user_id=user2_id, + sender=user2_id, + membership_event_id=user2_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user2_leave_event_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + @parameterized.expand( + [ + # No stripped state provided + ("none", None), + # Empty stripped state provided + ("empty", []), + ] + ) + def test_non_join_remote_invite_no_stripped_state( + self, _description: str, stripped_state: Optional[List[StrippedStateEvent]] + ) -> None: + """ + Test remote invite with no stripped state provided shows up in + `sliding_sync_membership_snapshots` with `has_known_state=False`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room without any `unsigned.invite_room_state` + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user(user1_id, stripped_state) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, + # No stripped state provided + has_known_state=False, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_remote_invite_unencrypted_room(self) -> None: + """ + Test remote invite with stripped state (unencrypted room) shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper room", + }, + ), + ], + ) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_remote_invite_encrypted_room(self) -> None: + """ + Test remote invite with stripped state (encrypted room) shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + # This is not one of the stripped state events according to the state + # but we still handle it. + StrippedStateEvent( + type=EventTypes.Tombstone, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room", + }, + ), + # Also test a random event that we don't care about + StrippedStateEvent( + type="org.matrix.foo_state", + state_key="", + sender="@inviter:remote_server", + content={ + "foo": "qux", + }, + ), + ], + ) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + + def test_non_join_remote_invite_space_room(self) -> None: + """ + Test remote invite with stripped state (encrypted space room with name) shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + # Specify that it is a space room + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper space", + }, + ), + ], + ) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=remote_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=remote_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_reject_remote_invite(self) -> None: + """ + Test rejected remote invite (user decided to leave the room) inherits meta data + from when the remote invite stripped state and shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + # User1 decides to leave the room (reject the invite) + user1_leave_response = self.helper.leave( + remote_invite_room_id, user1_id, tok=user1_tok + ) + user1_leave_pos = self.get_success( + self.store.get_position_for_event(user1_leave_response["event_id"]) + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=user1_leave_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=user1_leave_pos.stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_retracted_remote_invite(self) -> None: + """ + Test retracted remote invite (Remote inviter kicks the person who was invited) + inherits meta data from when the remote invite stripped state and shows up in + `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. + remote_invite_room_id, remote_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + # `@inviter:remote_server` decides to retract the invite (kicks the user). + # (Note: A kick is just a leave event with a different sender) + remote_invite_retraction_event = self._retract_remote_invite_for_user( + user_id=user1_id, + remote_room_id=remote_invite_room_id, + ) + + # No one local is joined to the remote room + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (remote_invite_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (remote_invite_room_id, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=remote_invite_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=remote_invite_retraction_event.event_id, + membership=Membership.LEAVE, + event_stream_ordering=remote_invite_retraction_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + + def test_non_join_state_reset(self) -> None: + """ + Test a state reset that removes someone from the room. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Make sure we see the new room name + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined (no room + # name when the room creator joined) + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Mock a state reset removing the membership for user1 in the current state + message_tuple = self.get_success( + create_event( + self.hs, + prev_event_ids=[state_map[(EventTypes.Name, "")].event_id], + auth_event_ids=[ + state_map[(EventTypes.Create, "")].event_id, + state_map[(EventTypes.Member, user1_id)].event_id, + ], + type=EventTypes.Message, + content={"body": "foo", "msgtype": "m.text"}, + sender=user1_id, + room_id=room_id, + room_version=RoomVersions.V10.identifier, + ) + ) + event_chunk = [message_tuple] + self.get_success( + self.persist_events_store._persist_events_and_state_updates( + room_id, + event_chunk, + state_delta_for_room=DeltaState( + # This is the state reset part. We're removing the room name state. + to_delete=[(EventTypes.Member, user1_id)], + to_insert={}, + ), + new_forward_extremities={message_tuple[0].event_id}, + use_negative_stream_ordering=False, + inhibit_local_membership_updates=False, + new_event_links={}, + ) + ) + + # State reset on membership doesn't affect the`sliding_sync_joined_rooms` table + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=message_tuple[ + 0 + ].internal_metadata.stream_ordering, + bump_stamp=message_tuple[0].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + # State reset on membership should remove the user's snapshot + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # We shouldn't see user1 in the snapshots table anymore + (room_id, user2_id), + }, + exact=True, + ) + # Snapshot for user2 hasn't changed + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_joined_background_update_missing(self) -> None: + """ + Test that the background update for `sliding_sync_joined_rooms` populates missing rows + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user1_id, tok=user1_tok) + + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user1_tok, + ) + + # Clean-up the `sliding_sync_joined_rooms` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_joined_rooms", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_joined_rooms.test_joined_background_update_missing", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_no_info, room_id_with_info, space_room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_no_info) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_no_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_no_info, + # History visibility just happens to be the last event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomHistoryVisibility, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_with_info) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_with_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_with_info, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[space_room_id], + _SlidingSyncJoinedRoomResult( + room_id=space_room_id, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.Name, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_joined_background_update_partial(self) -> None: + """ + Test that the background update for `sliding_sync_joined_rooms` populates + partially updated rows. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_with_info) + ) + + # Clean-up the `sliding_sync_joined_rooms` table as if the the encryption event + # never made it into the table. + self.get_success( + self.store.db_pool.simple_update( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id_with_info}, + updatevalues={"is_encrypted": False}, + desc="sliding_sync_joined_rooms.test_joined_background_update_partial", + ) + ) + + # We should see the partial row that we made in preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_with_info}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_with_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_with_info, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_with_info}, + exact=True, + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id_with_info], + _SlidingSyncJoinedRoomResult( + room_id=room_id_with_info, + # Lastest event sent in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_joined(self) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + populates missing rows for join memberships. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user1_id, tok=user1_tok) + + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + # Add a tombstone + self.helper.send_state( + room_id_with_info, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user1_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user1_tok, + ) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_joined", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + }, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_no_info) + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id_with_info) + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(space_room_id) + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_local_invite(self) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + populates missing rows for invite memberships. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user2_id, tok=user2_tok) + + room_id_with_info = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + # Add a tombstone + self.helper.send_state( + room_id_with_info, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # Invite user1 to the rooms + user1_invite_room_id_no_info_response = self.helper.invite( + room_id_no_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invite_room_id_with_info_response = self.helper.invite( + room_id_with_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_invite_space_room_id_response = self.helper.invite( + space_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) + + # Have user2 leave the rooms to make sure that our background update is not just + # reading from `current_state_events`. For invite/knock memberships, we should + # be reading from the stripped state on the invite/knock event itself. + self.helper.leave(room_id_no_info, user2_id, tok=user2_tok) + self.helper.leave(room_id_with_info, user2_id, tok=user2_tok) + self.helper.leave(space_room_id, user2_id, tok=user2_tok) + # Check to make sure we actually don't have any `current_state_events` for the rooms + current_state_check_rows = self.get_success( + self.store.db_pool.simple_select_many_batch( + table="current_state_events", + column="room_id", + iterable=[room_id_no_info, room_id_with_info, space_room_id], + retcols=("event_id",), + keyvalues={}, + desc="check current_state_events in test", + ) + ) + self.assertEqual(len(current_state_check_rows), 0) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_local_invite", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The invite memberships for user1 + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + # The leave memberships for user2 + (room_id_no_info, user2_id), + (room_id_with_info, user2_id), + (space_room_id, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_invite_room_id_no_info_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_invite_room_id_no_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_invite_room_id_with_info_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_invite_room_id_with_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + # The tombstone isn't showing here ("another_room") because it's not one + # of the stripped events that we hand out as part of the invite event. + # Even though we handle this scenario from other remote homservers, + # Synapse does not include the tombstone in the invite event. + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + sender=user2_id, + membership_event_id=user1_invite_space_room_id_response["event_id"], + membership=Membership.INVITE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_invite_space_room_id_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_remote_invite( + self, + ) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + populates missing rows for remote invites (out-of-band memberships). + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_unknown_state, room_id_unknown_state_invite_event = ( + self._create_remote_invite_room_for_user(user1_id, None) + ) + + room_id_no_info, room_id_no_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + ], + ) + ) + + room_id_with_info, room_id_with_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper room", + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + space_room_id, space_room_id_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper space", + }, + ), + ], + ) + ) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=( + room_id_unknown_state, + room_id_no_info, + room_id_with_info, + space_room_id, + ), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_remote_invite", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The invite memberships for user1 + (room_id_unknown_state, user1_id), + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_unknown_state, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_unknown_state, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=room_id_unknown_state_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=room_id_unknown_state_invite_event.internal_metadata.stream_ordering, + has_known_state=False, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=room_id_no_info_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=room_id_no_info_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=room_id_with_info_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=room_id_with_info_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=space_room_id_invite_event.event_id, + membership=Membership.INVITE, + event_stream_ordering=space_room_id_invite_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_remote_invite_rejections_and_retractions( + self, + ) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + populates missing rows for remote invite rejections/retractions (out-of-band memberships). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_unknown_state, room_id_unknown_state_invite_event = ( + self._create_remote_invite_room_for_user(user1_id, None) + ) + + room_id_no_info, room_id_no_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + ], + ) + ) + + room_id_with_info, room_id_with_info_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper room", + }, + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + ), + ], + ) + ) + + space_room_id, space_room_id_invite_event = ( + self._create_remote_invite_room_for_user( + user1_id, + [ + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + ), + StrippedStateEvent( + type=EventTypes.Name, + state_key="", + sender="@inviter:remote_server", + content={ + EventContentFields.ROOM_NAME: "my super duper space", + }, + ), + ], + ) + ) + + # Reject the remote invites. + # Also try retracting a remote invite. + room_id_unknown_state_leave_event_response = self.helper.leave( + room_id_unknown_state, user1_id, tok=user1_tok + ) + room_id_no_info_leave_event = self._retract_remote_invite_for_user( + user_id=user1_id, + remote_room_id=room_id_no_info, + ) + room_id_with_info_leave_event_response = self.helper.leave( + room_id_with_info, user1_id, tok=user1_tok + ) + space_room_id_leave_event = self._retract_remote_invite_for_user( + user_id=user1_id, + remote_room_id=space_room_id, + ) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=( + room_id_unknown_state, + room_id_no_info, + room_id_with_info, + space_room_id, + ), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_remote_invite_rejections_and_retractions", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The invite memberships for user1 + (room_id_unknown_state, user1_id), + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_unknown_state, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_unknown_state, + user_id=user1_id, + sender=user1_id, + membership_event_id=room_id_unknown_state_leave_event_response[ + "event_id" + ], + membership=Membership.LEAVE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + room_id_unknown_state_leave_event_response["event_id"] + ) + ).stream, + has_known_state=False, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=room_id_no_info_leave_event.event_id, + membership=Membership.LEAVE, + event_stream_ordering=room_id_no_info_leave_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + sender=user1_id, + membership_event_id=room_id_with_info_leave_event_response["event_id"], + membership=Membership.LEAVE, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + room_id_with_info_leave_event_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + sender="@inviter:remote_server", + membership_event_id=space_room_id_leave_event.event_id, + membership=Membership.LEAVE, + event_stream_ordering=space_room_id_leave_event.internal_metadata.stream_ordering, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + @parameterized.expand( + [ + # We'll do a kick for this + (Membership.LEAVE,), + (Membership.BAN,), + ] + ) + def test_membership_snapshots_background_update_historical_state( + self, test_membership: str + ) -> None: + """ + Test that the background update for `sliding_sync_membership_snapshots` + populates missing rows for leave memberships. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create rooms with various levels of state that should appear in the table + # + room_id_no_info = self.helper.create_room_as(user2_id, tok=user2_tok) + + room_id_with_info = self.helper.create_room_as(user2_id, tok=user2_tok) + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user2_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id_with_info, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + # Add a tombstone + self.helper.send_state( + room_id_with_info, + EventTypes.Tombstone, + {EventContentFields.TOMBSTONE_SUCCESSOR_ROOM: "another_room"}, + tok=user2_tok, + ) + + space_room_id = self.helper.create_room_as( + user1_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Add a room name + self.helper.send_state( + space_room_id, + EventTypes.Name, + {"name": "my super duper space"}, + tok=user2_tok, + ) + + # Join the room in preparation for our test_membership + self.helper.join(room_id_no_info, user1_id, tok=user1_tok) + self.helper.join(room_id_with_info, user1_id, tok=user1_tok) + self.helper.join(space_room_id, user1_id, tok=user1_tok) + + if test_membership == Membership.LEAVE: + # Kick user1 from the rooms + user1_membership_room_id_no_info_response = self.helper.change_membership( + room=room_id_no_info, + src=user2_id, + targ=user1_id, + tok=user2_tok, + membership=Membership.LEAVE, + extra_data={ + "reason": "Bad manners", + }, + ) + user1_membership_room_id_with_info_response = self.helper.change_membership( + room=room_id_with_info, + src=user2_id, + targ=user1_id, + tok=user2_tok, + membership=Membership.LEAVE, + extra_data={ + "reason": "Bad manners", + }, + ) + user1_membership_space_room_id_response = self.helper.change_membership( + room=space_room_id, + src=user2_id, + targ=user1_id, + tok=user2_tok, + membership=Membership.LEAVE, + extra_data={ + "reason": "Bad manners", + }, + ) + elif test_membership == Membership.BAN: + # Ban user1 from the rooms + user1_membership_room_id_no_info_response = self.helper.ban( + room_id_no_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_membership_room_id_with_info_response = self.helper.ban( + room_id_with_info, src=user2_id, targ=user1_id, tok=user2_tok + ) + user1_membership_space_room_id_response = self.helper.ban( + space_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) + else: + raise AssertionError("Unknown test_membership") + + # Have user2 leave the rooms to make sure that our background update is not just + # reading from `current_state_events`. For leave memberships, we should be + # reading from the historical state. + self.helper.leave(room_id_no_info, user2_id, tok=user2_tok) + self.helper.leave(room_id_with_info, user2_id, tok=user2_tok) + self.helper.leave(space_room_id, user2_id, tok=user2_tok) + # Check to make sure we actually don't have any `current_state_events` for the rooms + current_state_check_rows = self.get_success( + self.store.db_pool.simple_select_many_batch( + table="current_state_events", + column="room_id", + iterable=[room_id_no_info, room_id_with_info, space_room_id], + retcols=("event_id",), + keyvalues={}, + desc="check current_state_events in test", + ) + ) + self.assertEqual(len(current_state_check_rows), 0) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id_no_info, room_id_with_info, space_room_id), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_historical_state", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + # The memberships for user1 + (room_id_no_info, user1_id), + (room_id_with_info, user1_id), + (space_room_id, user1_id), + # The leave memberships for user2 + (room_id_no_info, user2_id), + (room_id_with_info, user2_id), + (space_room_id, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id_no_info, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_no_info, + user_id=user1_id, + # Because user2 kicked/banned user1 from the room + sender=user2_id, + membership_event_id=user1_membership_room_id_no_info_response[ + "event_id" + ], + membership=test_membership, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_membership_room_id_no_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get( + (room_id_with_info, user1_id) + ), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id_with_info, + user_id=user1_id, + # Because user2 kicked/banned user1 from the room + sender=user2_id, + membership_event_id=user1_membership_room_id_with_info_response[ + "event_id" + ], + membership=test_membership, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_membership_room_id_with_info_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id="another_room", + ), + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((space_room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=space_room_id, + user_id=user1_id, + # Because user2 kicked/banned user1 from the room + sender=user2_id, + membership_event_id=user1_membership_space_room_id_response["event_id"], + membership=test_membership, + event_stream_ordering=self.get_success( + self.store.get_position_for_event( + user1_membership_space_room_id_response["event_id"] + ) + ).stream, + has_known_state=True, + room_type=RoomTypes.SPACE, + room_name="my super duper space", + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_forgotten_missing(self) -> None: + """ + Test that a new row is inserted into `sliding_sync_membership_snapshots` when it + doesn't exist in the table yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is forgotten + forgotten=True, + ), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ), + ) + + def test_membership_snapshots_background_update_forgotten_partial(self) -> None: + """ + Test an existing `sliding_sync_membership_snapshots` row is updated with the + latest `forgotten` status after the background update passes over it. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Clean-up the `sliding_sync_joined_rooms` table as if the forgotten status + # never made it into the table. + self.get_success( + self.store.db_pool.simple_update( + table="sliding_sync_membership_snapshots", + keyvalues={"room_id": room_id}, + updatevalues={"forgotten": 0}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_partial", + ) + ) + + # We should see the partial row that we made in preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is *not* forgotten because of our test preparation + forgotten=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Make sure the table is populated + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Forgotten status is now updated + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + attr.evolve(user1_snapshot, forgotten=True), + ) + # Holds the info according to the current state when the user joined + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_membership_snapshot_forget(self) -> None: + """ + Test forgetting a room will update `sliding_sync_membership_snapshots` + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Check on the `sliding_sync_membership_snapshots` table (nothing should be + # forgotten yet) + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is not forgotten + forgotten=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Check on the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Room is now forgotten for user1 + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + attr.evolve(user1_snapshot, forgotten=True), + ) + # Nothing changed for user2 + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_membership_snapshot_missing_forget( + self, + ) -> None: + """ + Test forgetting a room with no existing row in `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # It doesn't explode + + # We still shouldn't find anything in the table because nothing has re-created them + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) From 6edc4c78ce5c7f4abbc27e68df8cf2038520d625 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 17:09:43 -0500 Subject: [PATCH 096/142] Allow for no `bump_stamp` (fix `portdb` CI job) See https://github.com/element-hq/synapse/pull/17512#discussion_r1725998219 --- .../databases/main/events_bg_updates.py | 31 +++++++++++++------ tests/storage/test_events.py | 1 - 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index c3ee2952d60..83115831256 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1580,7 +1580,9 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: # Map from room_id to insert/update state values in the `sliding_sync_joined_rooms` table joined_room_updates: Dict[str, SlidingSyncStateInsertValues] = {} # Map from room_id to stream_ordering/bump_stamp/last_current_state_delta_stream_id values - joined_room_stream_ordering_updates: Dict[str, Tuple[int, int, int]] = {} + joined_room_stream_ordering_updates: Dict[ + str, Tuple[int, Optional[int], int] + ] = {} for room_id in rooms_to_update: current_state_ids_map, last_current_state_delta_stream_id = ( await self.db_pool.runInteraction( @@ -1613,21 +1615,32 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: most_recent_event_pos_results = await self.get_last_event_pos_in_room( room_id, event_types=None ) - assert most_recent_event_pos_results, ( + assert most_recent_event_pos_results is not None, ( f"We should not be seeing `None` here because the room ({room_id}) should at-least have a create event " + "given we pulled the room out of `current_state_events`" ) - # Figure out the latest bump_stamp in the room + most_recent_event_stream_ordering = most_recent_event_pos_results[1].stream + assert most_recent_event_stream_ordering > 0, ( + "We should have at-least one event in the room (our own join membership event for example) " + + "that isn't backfilled (negative `stream_ordering`) if we are joined to the room." + ) + # Figure out the latest bump_stamp in the room. This could be `None` for a + # federated room you just joined where all of events are still `outliers` or + # backfilled history. In the Sliding Sync API, we default to the user's + # membership event `stream_ordering` if we don't have a `bump_stamp`. bump_stamp_event_pos_results = await self.get_last_event_pos_in_room( room_id, event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES ) - assert bump_stamp_event_pos_results, ( - f"We should not be seeing `None` here because the room ({room_id}) should at-least have a create event " - + "(unless `SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES` no longer includes the room create event)" - ) + most_recent_bump_stamp = None + if ( + bump_stamp_event_pos_results is not None + and bump_stamp_event_pos_results[1].stream > 0 + ): + most_recent_bump_stamp = bump_stamp_event_pos_results[1].stream + joined_room_stream_ordering_updates[room_id] = ( - most_recent_event_pos_results[1].stream, - bump_stamp_event_pos_results[1].stream, + most_recent_event_stream_ordering, + most_recent_bump_stamp, last_current_state_delta_stream_id, ) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 7cc1367f94d..cb3d8e19bc3 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -22,7 +22,6 @@ import logging from typing import List, Optional - from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventTypes, Membership From 0726a6d58b77fcfb8b03511ec21b1c6599b1d942 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 18:13:00 -0500 Subject: [PATCH 097/142] Derive best effort `stream_ordering` outside of the transaction See https://github.com/element-hq/synapse/pull/17512#discussion_r1727995882 --- synapse/storage/databases/main/events.py | 159 ++++++++++++++--------- 1 file changed, 94 insertions(+), 65 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a4a573dd1ac..b8ad60194c1 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -169,6 +169,17 @@ class SlidingSyncMembershipInfo: @attr.s(slots=True, auto_attribs=True) class SlidingSyncTableChanges: room_id: str + # `stream_ordering` of the most recent event being persisted in the room. This doesn't + # need to be perfect, we just need *some* answer that points to a real event in the + # room in case we are the first ones inserting into the `sliding_sync_joined_rooms` + # table because of the `NON NULL` constraint on `event_stream_ordering`. In reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` is run after + # `_update_current_state_txn()` whenever a new event is persisted to update it to the + # correct latest value. + # + # This should be *some* value that points to a real event in the room if we are + # still joined to the room. + joined_room_best_effort_most_recent_stream_ordering: Optional[int] # Values to upsert into `sliding_sync_joined_rooms` joined_room_updates: SlidingSyncStateInsertValues @@ -374,7 +385,9 @@ async def _calculate_sliding_sync_table_changes( events_and_contexts: List of tuples of (event, context) being persisted. This is completely optional (you can pass an empty list) and will just save us from fetching the events from the database if we already have - them. + them. We assume the list is sorted ascending by `stream_ordering`. We + don't care about the sort when the events are backfilled (with negative + `stream_ordering`). delta_state: Deltas that are going to be used to update the `current_state_events` table. Changes to the current state of the room. """ @@ -526,6 +539,7 @@ async def _calculate_sliding_sync_table_changes( # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) # joined_room_updates: SlidingSyncStateInsertValues = {} + best_effort_most_recent_stream_ordering: Optional[int] = None if not delta_state.no_longer_in_room: # Look through the items we're going to insert into the current state to see # if there is anything that we care about and should also update in the @@ -576,9 +590,57 @@ async def _calculate_sliding_sync_table_changes( elif state_key == (EventTypes.Name, ""): joined_room_updates["room_name"] = None + # Figure out `best_effort_most_recent_stream_ordering`. This doesn't need to + # be perfect, we just need *some* answer that points to a real event in the + # room in case we are the first ones inserting into the + # `sliding_sync_joined_rooms` table because of the `NON NULL` constraint on + # `event_stream_ordering`. In reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` is run after + # `_update_current_state_txn()` whenever a new event is persisted to update + # it to the correct latest value. + # + if len(events_and_contexts) > 0: + # Since the list is sorted ascending by `stream_ordering`, the last event + # should have the highest `stream_ordering`. + best_effort_most_recent_stream_ordering = events_and_contexts[-1][ + 0 + ].internal_metadata.stream_ordering + elif to_insert: + # Even though `Mapping`/`Dict` have no guaranteed order, some + # implementations may preserve insertion order so we're just + # going to choose the best possible answer by using the "first" + # event ID which we will assume will have the greatest + # `stream_ordering`. We really just need *some* answer in case + # we are the first ones inserting into the table because of the + # `NON NULL` constraint on `event_stream_ordering`. In reality, + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` + # is run after this function to update it to the correct latest + # value. + event_id = next(iter(to_insert.values())) + event_pos = await self.store.get_position_for_event(event_id) + best_effort_most_recent_stream_ordering = event_pos.stream + + else: + most_recent_event_pos_results = ( + await self.store.get_last_event_pos_in_room( + room_id, event_types=None + ) + ) + assert most_recent_event_pos_results, ( + f"We should not be seeing `None` here because we are still in the room ({room_id}) and " + + "it should at-least have a create event." + ) + best_effort_most_recent_stream_ordering = most_recent_event_pos_results[ + 1 + ].stream + + # We should have found a value if we are still in the room + assert best_effort_most_recent_stream_ordering is not None + return SlidingSyncTableChanges( room_id=room_id, # For `sliding_sync_joined_rooms` + joined_room_best_effort_most_recent_stream_ordering=best_effort_most_recent_stream_ordering, joined_room_updates=joined_room_updates, # For `sliding_sync_membership_snapshots` membership_snapshot_shared_insert_values=membership_snapshot_shared_insert_values, @@ -1655,72 +1717,39 @@ def _update_current_state_txn( ) # We only need to update when one of the relevant state values has changed if sliding_sync_updates_keys: - # If we have some `to_insert` values, we can use the standard upsert - # pattern because we have access to an `event_id` to use for the - # `event_stream_ordering` which has a `NON NULL` constraint. - if to_insert: - args: List[Any] = [ - room_id, - # XXX: We can't use `stream_id` for the `event_stream_ordering` - # here because we have a foreign key constraint on - # `event_stream_ordering` that it should point to a valid event. - # When re-syncing the state of a partial-state room, `stream_id` - # is set to the next possible stream position for a future event - # that doesn't exist yet. - # - # Even though `Mapping`/`Dict` have no guaranteed order, some - # implementations may preserve insertion order so we're just - # going to choose the best possible answer by using the "first" - # event ID which we will assume will have the greatest - # `stream_ordering`. We really just need *some* answer in case - # we are the first ones inserting into the table because of the - # `NON NULL` constraint on `event_stream_ordering`. In reality, - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` - # is run after this function to update it to the correct latest - # value. - next(iter(to_insert.values())), - ] - - args.extend(iter(sliding_sync_updates_values)) - - # We don't update `event_stream_ordering` `ON CONFLICT` because it's - # simpler and we can just rely on - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to - # do the right thing (same for `bump_stamp`). The only reason we're - # inserting `event_stream_ordering` here is because the column has a - # `NON NULL` constraint and we need some answer. - txn.execute( - f""" - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, {", ".join(sliding_sync_updates_keys)}) - VALUES ( - ?, - (SELECT stream_ordering FROM events WHERE event_id = ?), - {", ".join("?" for _ in sliding_sync_updates_values)} - ) - ON CONFLICT (room_id) - DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in sliding_sync_updates_keys)} - """, - args, - ) + # This should be *some* value that points to a real event in the room if + # we are still joined to the room. + assert ( + sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering + is not None + ) - # If there are only values `to_delete`, we have to use an `UPDATE` - # instead because there is no `event_id` to use for the `NON NULL` - # constraint on `event_stream_ordering`. - elif to_delete: - num_rows_updated = self.db_pool.simple_update_txn( - txn, - table="sliding_sync_joined_rooms", - keyvalues={ - "room_id": room_id, - }, - updatevalues=sliding_sync_table_changes.joined_room_updates, + args: List[Any] = [ + room_id, + sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering, + ] + args.extend(iter(sliding_sync_updates_values)) + + # We don't update `event_stream_ordering` `ON CONFLICT` because it's + # simpler and we can just rely on + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to + # do the right thing (same for `bump_stamp`). The only reason we're + # inserting `event_stream_ordering` here is because the column has a + # `NON NULL` constraint and we need some answer. + txn.execute( + f""" + INSERT INTO sliding_sync_joined_rooms + (room_id, event_stream_ordering, {", ".join(sliding_sync_updates_keys)}) + VALUES ( + ?, ?, + {", ".join("?" for _ in sliding_sync_updates_values)} ) - # TODO: Is this assumption correct? - assert ( - num_rows_updated > 0 - ), "Expected to only run this against existing rows" + ON CONFLICT (room_id) + DO UPDATE SET + {", ".join(f"{key} = EXCLUDED.{key}" for key in sliding_sync_updates_keys)} + """, + args, + ) # We now update `local_current_membership`. We do this regardless # of whether we're still in the room or not to handle the case where From 088a4c7cf0390a6d2a4359807ec80b4ad357c2e6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 18:26:37 -0500 Subject: [PATCH 098/142] Use `simple_upsert_txn` to update `sliding_sync_joined_rooms` See https://github.com/element-hq/synapse/pull/17512#discussion_r1726817206 --- synapse/storage/database.py | 18 ++++----- synapse/storage/databases/main/events.py | 50 ++++++++---------------- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 66a7238debb..ba2616b4795 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1255,9 +1255,9 @@ def simple_upsert_txn( self, txn: LoggingTransaction, table: str, - keyvalues: Dict[str, Any], - values: Dict[str, Any], - insertion_values: Optional[Dict[str, Any]] = None, + keyvalues: Mapping[str, Any], + values: Mapping[str, Any], + insertion_values: Optional[Mapping[str, Any]] = None, where_clause: Optional[str] = None, ) -> bool: """ @@ -1300,9 +1300,9 @@ def simple_upsert_txn_emulated( self, txn: LoggingTransaction, table: str, - keyvalues: Dict[str, Any], - values: Dict[str, Any], - insertion_values: Optional[Dict[str, Any]] = None, + keyvalues: Mapping[str, Any], + values: Mapping[str, Any], + insertion_values: Optional[Mapping[str, Any]] = None, where_clause: Optional[str] = None, lock: bool = True, ) -> bool: @@ -1381,9 +1381,9 @@ def simple_upsert_txn_native_upsert( self, txn: LoggingTransaction, table: str, - keyvalues: Dict[str, Any], - values: Dict[str, Any], - insertion_values: Optional[Dict[str, Any]] = None, + keyvalues: Mapping[str, Any], + values: Mapping[str, Any], + insertion_values: Optional[Mapping[str, Any]] = None, where_clause: Optional[str] = None, ) -> bool: """ diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b8ad60194c1..bc190035440 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1708,15 +1708,8 @@ def _update_current_state_txn( # persisting stack (see # `_update_sliding_sync_tables_with_new_persisted_events_txn()`) # - # Pulling keys/values separately is safe and will produce congruent lists - sliding_sync_updates_keys = ( - sliding_sync_table_changes.joined_room_updates.keys() - ) - sliding_sync_updates_values = ( - sliding_sync_table_changes.joined_room_updates.values() - ) # We only need to update when one of the relevant state values has changed - if sliding_sync_updates_keys: + if sliding_sync_table_changes.joined_room_updates: # This should be *some* value that points to a real event in the room if # we are still joined to the room. assert ( @@ -1724,31 +1717,22 @@ def _update_current_state_txn( is not None ) - args: List[Any] = [ - room_id, - sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering, - ] - args.extend(iter(sliding_sync_updates_values)) - - # We don't update `event_stream_ordering` `ON CONFLICT` because it's - # simpler and we can just rely on - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` to - # do the right thing (same for `bump_stamp`). The only reason we're - # inserting `event_stream_ordering` here is because the column has a - # `NON NULL` constraint and we need some answer. - txn.execute( - f""" - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, {", ".join(sliding_sync_updates_keys)}) - VALUES ( - ?, ?, - {", ".join("?" for _ in sliding_sync_updates_values)} - ) - ON CONFLICT (room_id) - DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in sliding_sync_updates_keys)} - """, - args, + self.db_pool.simple_upsert_txn( + txn, + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + values=sliding_sync_table_changes.joined_room_updates, + insertion_values={ + # The reason we're only *inserting* `event_stream_ordering` here + # is because the column has a `NON NULL` constraint and we need + # *some* answer. If the row already exists, we are trying to + # avoid doing an `UPDATE` and accidentally overwriting the value + # with some stale data since this is just a "best effort" value. + # It's better to just rely on + # `_update_sliding_sync_tables_with_new_persisted_events_txn()` + # to do the right thing (same for `bump_stamp`). + "event_stream_ordering": sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering + }, ) # We now update `local_current_membership`. We do this regardless From 4b866c4fcad9238d8f876be4d53b04a3e43da2c3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 18:36:43 -0500 Subject: [PATCH 099/142] Simplify what we need to think about to grab the best effort value --- synapse/storage/databases/main/events.py | 25 ++++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index bc190035440..06ae00f0bca 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -605,22 +605,17 @@ async def _calculate_sliding_sync_table_changes( best_effort_most_recent_stream_ordering = events_and_contexts[-1][ 0 ].internal_metadata.stream_ordering - elif to_insert: - # Even though `Mapping`/`Dict` have no guaranteed order, some - # implementations may preserve insertion order so we're just - # going to choose the best possible answer by using the "first" - # event ID which we will assume will have the greatest - # `stream_ordering`. We really just need *some* answer in case - # we are the first ones inserting into the table because of the - # `NON NULL` constraint on `event_stream_ordering`. In reality, - # `_update_sliding_sync_tables_with_new_persisted_events_txn()` - # is run after this function to update it to the correct latest - # value. - event_id = next(iter(to_insert.values())) - event_pos = await self.store.get_position_for_event(event_id) - best_effort_most_recent_stream_ordering = event_pos.stream - else: + # If there are no `events_and_contexts`, we assume it's one of two scenarios: + # 1. If there are new state `to_insert` but no `events_and_contexts`, + # then it's a state reset. + # 2. Otherwise, it's some partial-state room re-syncing the current state and + # going through un-partial process. + # + # Either way, we assume no new events are being persisted and we can + # find the latest already in the database. Since this is a best-effort + # value, we don't need to be perfect although I think we're pretty close + # here. most_recent_event_pos_results = ( await self.store.get_last_event_pos_in_room( room_id, event_types=None From fdb8b5931feea2bf8436361f25d24b4869c884e9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 18:43:51 -0500 Subject: [PATCH 100/142] Correct comment --- synapse/storage/databases/main/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 06ae00f0bca..e1a454b07e5 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -623,7 +623,7 @@ async def _calculate_sliding_sync_table_changes( ) assert most_recent_event_pos_results, ( f"We should not be seeing `None` here because we are still in the room ({room_id}) and " - + "it should at-least have a create event." + + "it should at-least have a join membership event that's keeping us here." ) best_effort_most_recent_stream_ordering = most_recent_event_pos_results[ 1 From 21cc97ba9da6fadd5fb9d7d3f237073eaad6f239 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 18:52:05 -0500 Subject: [PATCH 101/142] Use `simple_upsert_many_txn` for `sliding_sync_membership_snapshots` See https://github.com/element-hq/synapse/pull/17512#discussion_r1726820006 --- synapse/storage/databases/main/events.py | 50 ++++++++++-------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index e1a454b07e5..3b2a6783c0e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1784,44 +1784,36 @@ def _update_current_state_txn( if sliding_sync_table_changes.to_insert_membership_snapshots: # Update the `sliding_sync_membership_snapshots` table # - # Pulling keys/values separately is safe and will produce congruent - # lists - sliding_sync_snapshot_keys = ( - sliding_sync_table_changes.membership_snapshot_shared_insert_values.keys() - ) - sliding_sync_snapshot_values = ( - sliding_sync_table_changes.membership_snapshot_shared_insert_values.values() - ) # We need to insert/update regardless of whether we have `sliding_sync_snapshot_keys` # because there are other fields in the `ON CONFLICT` upsert to run (see # inherit case above for more context when this happens). - txn.execute_batch( - f""" - INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, sender, membership_event_id, membership, event_stream_ordering - {("," + ", ".join(sliding_sync_snapshot_keys)) if sliding_sync_snapshot_keys else ""}) - VALUES ( - ?, ?, ?, ?, ?, ? - {("," + ", ".join("?" for _ in sliding_sync_snapshot_values)) if sliding_sync_snapshot_values else ""} - ) - ON CONFLICT (room_id, user_id) - DO UPDATE SET - sender = EXCLUDED.sender, - membership_event_id = EXCLUDED.membership_event_id, - membership = EXCLUDED.membership, - event_stream_ordering = EXCLUDED.event_stream_ordering - {("," + ", ".join(f"{key} = EXCLUDED.{key}" for key in sliding_sync_snapshot_keys)) if sliding_sync_snapshot_keys else ""} - """, - [ + self.db_pool.simple_upsert_many_txn( + txn=txn, + table="sliding_sync_membership_snapshots", + key_names=("room_id", "user_id"), + key_values=[ + (room_id, membership_info.user_id) + for membership_info in sliding_sync_table_changes.to_insert_membership_snapshots + ], + value_names=[ + "sender", + "membership_event_id", + "membership", + "event_stream_ordering", + ] + + list( + sliding_sync_table_changes.membership_snapshot_shared_insert_values.keys() + ), + value_values=[ [ - room_id, - membership_info.user_id, membership_info.sender, membership_info.membership_event_id, membership_info.membership, membership_info.membership_event_stream_ordering, ] - + list(sliding_sync_snapshot_values) + + list( + sliding_sync_table_changes.membership_snapshot_shared_insert_values.values() + ) for membership_info in sliding_sync_table_changes.to_insert_membership_snapshots ], ) From f8926d07df2654cea7970c16145dcc0f041e6fa2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 19:06:04 -0500 Subject: [PATCH 102/142] Fix partial-stated room re-syncing state but nothing has changed Fixes failing test in CI: `tests.handlers.test_federation.PartialJoinTestCase.test_failed_partial_join_is_clean` ``` 2024-08-22 18:57:22-0500 [-] synapse.metrics.background_process_metrics - 253 - ERROR - sync_partial_state_room-0 - Background process 'sync_partial_state_room' threw an exception Traceback (most recent call last): File "synapse/synapse/metrics/background_process_metrics.py", line 251, in run return await func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "synapse/synapse/handlers/federation.py", line 1842, in _sync_partial_state_room_wrapper await self._sync_partial_state_room( File "synapse/synapse/handlers/federation.py", line 1933, in _sync_partial_state_room await self.state_handler.update_current_state(room_id) File "synapse/synapse/state/__init__.py", line 554, in update_current_state await self._storage_controllers.persistence.update_current_state(room_id) File "synapse/synapse/storage/controllers/persist_events.py", line 491, in update_current_state await self._event_persist_queue.add_to_queue( File "synapse/synapse/storage/controllers/persist_events.py", line 245, in add_to_queue res = await make_deferred_yieldable(end_item.deferred.observe()) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "synapse/synapse/storage/controllers/persist_events.py", line 288, in handle_queue_loop ret = await self._per_item_callback(room_id, item.task) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "synapse/synapse/storage/controllers/persist_events.py", line 370, in _process_event_persist_queue_task await self._update_current_state(room_id, task) File "synapse/synapse/storage/controllers/persist_events.py", line 507, in _update_current_state await self.persist_events_store._calculate_sliding_sync_table_changes( File "synapse/synapse/storage/databases/main/events.py", line 624, in _calculate_sliding_sync_table_changes assert most_recent_event_pos_results, ( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError: We should not be seeing `None` here because we are still in the room (!room:example.com) and it should at-least have a join membership event that's keeping us here. ``` --- synapse/storage/databases/main/events.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 3b2a6783c0e..c4188b087da 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -178,7 +178,7 @@ class SlidingSyncTableChanges: # correct latest value. # # This should be *some* value that points to a real event in the room if we are - # still joined to the room. + # still joined to the room and some state is changing (`to_insert` or `to_delete`). joined_room_best_effort_most_recent_stream_ordering: Optional[int] # Values to upsert into `sliding_sync_joined_rooms` joined_room_updates: SlidingSyncStateInsertValues @@ -394,6 +394,18 @@ async def _calculate_sliding_sync_table_changes( to_insert = delta_state.to_insert to_delete = delta_state.to_delete + # If no state is changing, we don't need to do anything. This can happen when a + # partial-stated room is re-syncing the current state. + if not to_insert and not to_delete: + return SlidingSyncTableChanges( + room_id=room_id, + joined_room_best_effort_most_recent_stream_ordering=None, + joined_room_updates={}, + membership_snapshot_shared_insert_values={}, + to_insert_membership_snapshots=[], + to_delete_membership_snapshots=[], + ) + event_map = {event.event_id: event for event, _ in events_and_contexts} # Handle gathering info for the `sliding_sync_membership_snapshots` table From b6a7d2bf6ccce65c8b4095ca8c2bfe7f86e24e03 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 23:30:00 -0500 Subject: [PATCH 103/142] Use `simple_upsert_txn` for `sliding_sync_joined_rooms` in background update --- synapse/storage/databases/main/events.py | 11 +++---- .../databases/main/events_bg_updates.py | 33 +++++++------------ 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c4188b087da..fa41d339205 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1730,12 +1730,11 @@ def _update_current_state_txn( keyvalues={"room_id": room_id}, values=sliding_sync_table_changes.joined_room_updates, insertion_values={ - # The reason we're only *inserting* `event_stream_ordering` here - # is because the column has a `NON NULL` constraint and we need - # *some* answer. If the row already exists, we are trying to - # avoid doing an `UPDATE` and accidentally overwriting the value - # with some stale data since this is just a "best effort" value. - # It's better to just rely on + # The reason we're only *inserting* (not *updating*) + # `event_stream_ordering` here is because the column has a `NON + # NULL` constraint and we need *some* answer. And if the row + # already exists, it already has the correct value and it's + # better to just rely on # `_update_sliding_sync_tables_with_new_persisted_events_txn()` # to do the right thing (same for `bump_stamp`). "event_stream_ordering": sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 83115831256..5e6768d2cec 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1691,33 +1691,24 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: + "Raising exception so we can just try again." ) - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = insert_map.keys() - insert_values = insert_map.values() # Since we partially update the `sliding_sync_joined_rooms` as new state # is sent, we need to update the state fields `ON CONFLICT`. We just # have to be careful we're not overwriting it with stale data (see # `last_current_state_delta_stream_id` check above). # - # We don't need to update `event_stream_ordering` and `bump_stamp` `ON - # CONFLICT` because if they are present, that means they are already - # up-to-date. - sql = f""" - INSERT INTO sliding_sync_joined_rooms - (room_id, event_stream_ordering, bump_stamp, {", ".join(insert_keys)}) - VALUES ( - ?, ?, ?, - {", ".join("?" for _ in insert_values)} - ) - ON CONFLICT (room_id) - DO UPDATE SET - {", ".join(f"{key} = EXCLUDED.{key}" for key in insert_keys)} - """ - args = [room_id, event_stream_ordering, bump_stamp] + list( - insert_values + self.db_pool.simple_upsert_txn( + txn, + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + values=insert_map, + insertion_values={ + # The reason we're only *inserting* (not *updating*) `event_stream_ordering` + # and `bump_stamp` is because if they are present, that means they are already + # up-to-date. + "event_stream_ordering": event_stream_ordering, + "bump_stamp": bump_stamp, + }, ) - txn.execute(sql, args) # Keep track of the last successful room_id last_successful_room_id = room_id From a57d47b778c69b7c021b8a641e5a01d64f5f6131 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Aug 2024 23:59:06 -0500 Subject: [PATCH 104/142] Use `simple_upsert_txn` for `sliding_sync_membership_snapshots` in background update --- .../databases/main/events_bg_updates.py | 57 +++++++++---------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 5e6768d2cec..ba0b97ae2ab 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1982,42 +1982,41 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: membership_info.membership_event_stream_ordering ) - # Pulling keys/values separately is safe and will produce congruent - # lists - insert_keys = insert_map.keys() - insert_values = insert_map.values() - # We don't need to update the state `ON CONFLICT` because we never - # partially insert/update the snapshots and anything already there is - # up-to-date EXCEPT for the `forgotten` field since that is updated out - # of band from the membership changes. + # We don't need to upsert the state because we never partially + # insert/update the snapshots and anything already there is up-to-date + # EXCEPT for the `forgotten` field since that is updated out-of-band + # from the membership changes. # + # Even though we're only doing insertions, we're using + # `simple_upsert_txn()` here to avoid unique violation errors that would + # happen from `simple_insert_txn()` + self.db_pool.simple_upsert_txn( + txn, + table="sliding_sync_membership_snapshots", + keyvalues={"room_id": room_id, "user_id": user_id}, + values={}, + insertion_values={ + **insert_map, + "sender": sender, + "membership_event_id": membership_event_id, + "membership": membership, + "event_stream_ordering": membership_event_stream_ordering, + }, + ) # We need to find the `forgotten` value during the transaction because # we can't risk inserting stale data. txn.execute( - f""" - INSERT INTO sliding_sync_membership_snapshots - (room_id, user_id, sender, membership_event_id, membership, forgotten, event_stream_ordering - {("," + ", ".join(insert_keys)) if insert_keys else ""}) - VALUES ( - ?, ?, ?, ?, ?, - (SELECT forgotten FROM room_memberships WHERE event_id = ?), - ? - {("," + ", ".join("?" for _ in insert_values)) if insert_values else ""} - ) - ON CONFLICT (room_id, user_id) - DO UPDATE SET - forgotten = EXCLUDED.forgotten + """ + UPDATE sliding_sync_membership_snapshots + SET + forgotten = (SELECT forgotten FROM room_memberships WHERE event_id = ?) + WHERE room_id = ? and user_id = ? """, - [ + ( + membership_event_id, room_id, user_id, - sender, - membership_event_id, - membership, - membership_event_id, - membership_event_stream_ordering, - ] - + list(insert_values), + ), ) await self.db_pool.runInteraction( From 9795556052e5fe269f2164096e191fb941b44cd0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 14:42:55 -0500 Subject: [PATCH 105/142] Update comment --- synapse/storage/databases/main/events_bg_updates.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index ba0b97ae2ab..af0a1ba8805 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1624,10 +1624,11 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: "We should have at-least one event in the room (our own join membership event for example) " + "that isn't backfilled (negative `stream_ordering`) if we are joined to the room." ) - # Figure out the latest bump_stamp in the room. This could be `None` for a + # Figure out the latest `bump_stamp` in the room. This could be `None` for a # federated room you just joined where all of events are still `outliers` or # backfilled history. In the Sliding Sync API, we default to the user's - # membership event `stream_ordering` if we don't have a `bump_stamp`. + # membership event `stream_ordering` if we don't have a `bump_stamp` so + # having it as `None` in this table is fine. bump_stamp_event_pos_results = await self.get_last_event_pos_in_room( room_id, event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES ) From addb91485f3475649be8e9c01867d4834e41be07 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 16:11:56 -0500 Subject: [PATCH 106/142] Split test cases --- tests/storage/test_sliding_sync_tables.py | 359 +++++++++++----------- 1 file changed, 187 insertions(+), 172 deletions(-) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 34f42b6fd40..fb2340b446b 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -81,9 +81,9 @@ class _SlidingSyncMembershipSnapshotResult: forgotten: bool = False -class SlidingSyncPrePopulatedTablesTestCase(HomeserverTestCase): +class SlidingSyncTablesTestCaseBase(HomeserverTestCase): """ - Tests to make sure the + Helpers to deal with testing that the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` database tables are populated correctly. """ @@ -319,6 +319,14 @@ def _retract_remote_invite_for_user( return persisted_event + +class SlidingSyncTablesTestCase(SlidingSyncTablesTestCaseBase): + """ + Tests to make sure the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` database tables are + populated and updated correctly as new events are sent. + """ + def test_joined_room_with_no_info(self) -> None: """ Test joined room that doesn't have a room type, encryption, or name shows up in @@ -2407,6 +2415,183 @@ def test_non_join_state_reset(self) -> None: user2_snapshot, ) + def test_membership_snapshot_forget(self) -> None: + """ + Test forgetting a room will update `sliding_sync_membership_snapshots` + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + + # Check on the `sliding_sync_membership_snapshots` table (nothing should be + # forgotten yet) + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Holds the info according to the current state when the user joined + user1_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user1_id, + sender=user1_id, + membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, + membership=Membership.LEAVE, + event_stream_ordering=state_map[ + (EventTypes.Member, user1_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + # Room is not forgotten + forgotten=False, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + user1_snapshot, + ) + # Holds the info according to the current state when the user joined + user2_snapshot = _SlidingSyncMembershipSnapshotResult( + room_id=room_id, + user_id=user2_id, + sender=user2_id, + membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, + membership=Membership.JOIN, + event_stream_ordering=state_map[ + (EventTypes.Member, user2_id) + ].internal_metadata.stream_ordering, + has_known_state=True, + room_type=None, + room_name=None, + is_encrypted=False, + tombstone_successor_room_id=None, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Check on the `sliding_sync_membership_snapshots` table + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Room is now forgotten for user1 + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + attr.evolve(user1_snapshot, forgotten=True), + ) + # Nothing changed for user2 + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + user2_snapshot, + ) + + def test_membership_snapshot_missing_forget( + self, + ) -> None: + """ + Test forgetting a room with no existing row in `sliding_sync_membership_snapshots`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 joins the room + self.helper.join(room_id, user1_id, tok=user1_tok) + # User1 leaves the room (we have to leave in order to forget the room) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not + # happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Forget the room + channel = self.make_request( + "POST", + f"/_matrix/client/r0/rooms/{room_id}/forget", + content={}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # It doesn't explode + + # We still shouldn't find anything in the table because nothing has re-created them + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + +class SlidingSyncTablesBackgroundUpdatesTestCase(SlidingSyncTablesTestCaseBase): + """ + Test the background updates that populate the `sliding_sync_joined_rooms` and + `sliding_sync_membership_snapshots` tables. + """ + def test_joined_background_update_missing(self) -> None: """ Test that the background update for `sliding_sync_joined_rooms` populates missing rows @@ -3987,173 +4172,3 @@ def test_membership_snapshots_background_update_forgotten_partial(self) -> None: sliding_sync_membership_snapshots_results.get((room_id, user2_id)), user2_snapshot, ) - - def test_membership_snapshot_forget(self) -> None: - """ - Test forgetting a room will update `sliding_sync_membership_snapshots` - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - # User1 leaves the room (we have to leave in order to forget the room) - self.helper.leave(room_id, user1_id, tok=user1_tok) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id) - ) - - # Check on the `sliding_sync_membership_snapshots` table (nothing should be - # forgotten yet) - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Holds the info according to the current state when the user joined - user1_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user1_id, - sender=user1_id, - membership_event_id=state_map[(EventTypes.Member, user1_id)].event_id, - membership=Membership.LEAVE, - event_stream_ordering=state_map[ - (EventTypes.Member, user1_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - # Room is not forgotten - forgotten=False, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - user1_snapshot, - ) - # Holds the info according to the current state when the user joined - user2_snapshot = _SlidingSyncMembershipSnapshotResult( - room_id=room_id, - user_id=user2_id, - sender=user2_id, - membership_event_id=state_map[(EventTypes.Member, user2_id)].event_id, - membership=Membership.JOIN, - event_stream_ordering=state_map[ - (EventTypes.Member, user2_id) - ].internal_metadata.stream_ordering, - has_known_state=True, - room_type=None, - room_name=None, - is_encrypted=False, - tombstone_successor_room_id=None, - ) - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - # Forget the room - channel = self.make_request( - "POST", - f"/_matrix/client/r0/rooms/{room_id}/forget", - content={}, - access_token=user1_tok, - ) - self.assertEqual(channel.code, 200, channel.result) - - # Check on the `sliding_sync_membership_snapshots` table - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - { - (room_id, user1_id), - (room_id, user2_id), - }, - exact=True, - ) - # Room is now forgotten for user1 - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user1_id)), - attr.evolve(user1_snapshot, forgotten=True), - ) - # Nothing changed for user2 - self.assertEqual( - sliding_sync_membership_snapshots_results.get((room_id, user2_id)), - user2_snapshot, - ) - - def test_membership_snapshot_missing_forget( - self, - ) -> None: - """ - Test forgetting a room with no existing row in `sliding_sync_membership_snapshots`. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - - # User1 joins the room - self.helper.join(room_id, user1_id, tok=user1_tok) - # User1 leaves the room (we have to leave in order to forget the room) - self.helper.leave(room_id, user1_id, tok=user1_tok) - - # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not - # happen during event creation. - self.get_success( - self.store.db_pool.simple_delete_many( - table="sliding_sync_membership_snapshots", - column="room_id", - iterable=(room_id,), - keyvalues={}, - desc="sliding_sync_membership_snapshots.test_membership_snapshots_background_update_forgotten_missing", - ) - ) - - # We shouldn't find anything in the table because we just deleted them in - # preparation for the test. - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) - - # Forget the room - channel = self.make_request( - "POST", - f"/_matrix/client/r0/rooms/{room_id}/forget", - content={}, - access_token=user1_tok, - ) - self.assertEqual(channel.code, 200, channel.result) - - # It doesn't explode - - # We still shouldn't find anything in the table because nothing has re-created them - sliding_sync_membership_snapshots_results = ( - self._get_sliding_sync_membership_snapshots() - ) - self.assertIncludes( - set(sliding_sync_membership_snapshots_results.keys()), - set(), - exact=True, - ) From 8bddbe23bda8073ef6ac662e034cb0af488167df Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 16:19:47 -0500 Subject: [PATCH 107/142] Clear out-of-date rows --- synapse/storage/prepare_database.py | 162 +++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index aaffe5ecc9e..2b323c0272d 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -37,10 +37,15 @@ import attr from synapse.config.homeserver import HomeServerConfig -from synapse.storage.database import LoggingDatabaseConnection, LoggingTransaction +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor +from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) @@ -567,6 +572,161 @@ def _upgrade_existing_database( logger.info("Schema now up to date") + # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + # foreground update for + # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + # https://github.com/element-hq/synapse/issues/TODO) + _clear_out_of_date_sliding_sync_tables( + txn=cur, + ) + + +def _clear_out_of_date_sliding_sync_tables( + txn: LoggingTransaction, +) -> None: + """ + Clears out-of-date entries from the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + + This accounts for when someone downgrades their Synapse version and then upgrades it + again. This will ensure that we don't have any out-of-date data in the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/TODO) + """ + + _clear_out_of_date_sliding_sync_joined_rooms_table(txn) + _clear_out_of_date_sliding_sync_membership_snapshots_table(txn) + + +def _clear_out_of_date_sliding_sync_joined_rooms_table( + txn: LoggingTransaction, +) -> None: + """ + Clears out-of-date entries from the `sliding_sync_joined_rooms` table. + + This accounts for when someone downgrades their Synapse version and then upgrades it + again. This will ensure that we don't have any out-of-date data in the + `sliding_sync_joined_rooms` table. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/TODO) + """ + + # Find the point when we stopped writing to the `sliding_sync_joined_rooms` table + txn.execute( + """ + SELECT event_stream_ordering + FROM sliding_sync_joined_rooms + ORDER BY event_stream_ordering DESC + LIMIT 1 + """, + ) + + row = txn.fetchone() + # We have nothing written to the `sliding_sync_joined_rooms` table so there is + # nothing to clean up + if row is None: + return + + max_stream_ordering_sliding_sync_joined_rooms_table = row[0] + + txn.execute( + """ + SELECT DISTINCT(room_id) + FROM events + WHERE stream_ordering > ? + ORDER BY stream_ordering DESC + """, + (max_stream_ordering_sliding_sync_joined_rooms_table,), + ) + + room_rows = txn.fetchall() + # No new events have been written to the `events` table since the last time we wrote + # to the `sliding_sync_joined_rooms` table so there is nothing to clean up. This is + # the expected normal scenario for people who have not downgraded their Synapse + # version. + if not room_rows: + return + + for chunk in batch_iter(room_rows, 1000): + # Handle updating the `sliding_sync_joined_rooms` table + # + DatabasePool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_joined_rooms", + keys=("room_id",), + values=chunk, + ) + + +def _clear_out_of_date_sliding_sync_membership_snapshots_table( + txn: LoggingTransaction, +) -> None: + """ + Clears out-of-date entries from the `sliding_sync_membership_snapshots` table. + + This accounts for when someone downgrades their Synapse version and then upgrades it + again. This will ensure that we don't have any out-of-date data in the + `sliding_sync_membership_snapshots` table. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/TODO) + """ + + # Find the point when we stopped writing to the `sliding_sync_membership_snapshots` table + txn.execute( + """ + SELECT event_stream_ordering + FROM sliding_sync_membership_snapshots + ORDER BY event_stream_ordering DESC + LIMIT 1 + """, + ) + + row = txn.fetchone() + # We have nothing written to the `sliding_sync_membership_snapshots` table so there is + # nothing to clean up + if row is None: + return + + max_stream_ordering_sliding_sync_membership_snapshots_table = row[0] + + txn.execute( + """ + SELECT DISTINCT(user_id, room_id) + FROM room_memberships + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering DESC + """, + (max_stream_ordering_sliding_sync_membership_snapshots_table,), + ) + + membership_rows = txn.fetchall() + # No new events have been written to the `events` table since the last time we wrote + # to the `sliding_sync_membership_snapshots` table so there is nothing to clean up. + # This is the expected normal scenario for people who have not downgraded their + # Synapse version. + if not membership_rows: + return + + for chunk in batch_iter(membership_rows, 1000): + # Handle updating the `sliding_sync_membership_snapshots` table + # + DatabasePool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_membership_snapshots", + keys=("user_id", "room_id"), + values=chunk, + ) + def _apply_module_schemas( txn: Cursor, database_engine: BaseDatabaseEngine, config: HomeServerConfig From a94c1dd62c73bd878a539e04ce132c8f178fe4c9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 16:27:20 -0500 Subject: [PATCH 108/142] Add more context for why --- synapse/storage/prepare_database.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 2b323c0272d..83993cb9695 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -589,8 +589,13 @@ def _clear_out_of_date_sliding_sync_tables( `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any out-of-date data in the - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + again. This will ensure that we don't have any out-of-date/stale data in the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables since any new + events sent in rooms would have also needed to be written to the sliding sync + tables. For example a new event needs to bump `event_stream_ordering` in + `sliding_sync_joined_rooms` table or some state in the room changing (like the room + name). Or another example of someone's membership changing in a room affecting + `sliding_sync_membership_snapshots`. FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for @@ -609,8 +614,11 @@ def _clear_out_of_date_sliding_sync_joined_rooms_table( Clears out-of-date entries from the `sliding_sync_joined_rooms` table. This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any out-of-date data in the - `sliding_sync_joined_rooms` table. + again. This will ensure that we don't have any out-of-date/stale data in the + `sliding_sync_joined_rooms` table since any new events sent in rooms would have also + needed to be written to the `sliding_sync_joined_rooms` table or some state in the + room changing (like the room name). For example a new event needs to bump + `event_stream_ordering` in `sliding_sync_joined_rooms`. FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for @@ -672,8 +680,10 @@ def _clear_out_of_date_sliding_sync_membership_snapshots_table( Clears out-of-date entries from the `sliding_sync_membership_snapshots` table. This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any out-of-date data in the - `sliding_sync_membership_snapshots` table. + again. This will ensure that we don't have any out-of-date/stale data in the + `sliding_sync_membership_snapshots` table since any new membership changes in rooms + would have also needed to be written to the `sliding_sync_membership_snapshots` + table. FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for From 6a44686dc3002a691ca1a6bdcc1fc1a03bf0c085 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 16:32:59 -0500 Subject: [PATCH 109/142] Why it matters --- synapse/storage/prepare_database.py | 48 ++++++++++------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 83993cb9695..01559da03f2 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -576,20 +576,20 @@ def _upgrade_existing_database( # foreground update for # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by # https://github.com/element-hq/synapse/issues/TODO) - _clear_out_of_date_sliding_sync_tables( + _clear_stale_data_in_sliding_sync_tables( txn=cur, ) -def _clear_out_of_date_sliding_sync_tables( +def _clear_stale_data_in_sliding_sync_tables( txn: LoggingTransaction, ) -> None: """ - Clears out-of-date entries from the + Clears stale/out-of-date entries from the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any out-of-date/stale data in the + again. This will ensure that we don't have any stale/out-of-date data in the `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables since any new events sent in rooms would have also needed to be written to the sliding sync tables. For example a new event needs to bump `event_stream_ordering` in @@ -597,33 +597,28 @@ def _clear_out_of_date_sliding_sync_tables( name). Or another example of someone's membership changing in a room affecting `sliding_sync_membership_snapshots`. + This way, if a row exists in the sliding sync tables, we are able to rely on it + (accurate data). So if a row doesn't exist, we use a fallback to get the same info + until the background updates fill in the rows or a new event comes in triggering it + to be fully inserted. + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by https://github.com/element-hq/synapse/issues/TODO) """ - _clear_out_of_date_sliding_sync_joined_rooms_table(txn) - _clear_out_of_date_sliding_sync_membership_snapshots_table(txn) + _clear_stale_data_in_sliding_sync_joined_rooms_table(txn) + _clear_stale_data_in_sliding_sync_membership_snapshots_table(txn) -def _clear_out_of_date_sliding_sync_joined_rooms_table( +def _clear_stale_data_in_sliding_sync_joined_rooms_table( txn: LoggingTransaction, ) -> None: """ - Clears out-of-date entries from the `sliding_sync_joined_rooms` table. - - This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any out-of-date/stale data in the - `sliding_sync_joined_rooms` table since any new events sent in rooms would have also - needed to be written to the `sliding_sync_joined_rooms` table or some state in the - room changing (like the room name). For example a new event needs to bump - `event_stream_ordering` in `sliding_sync_joined_rooms`. + Clears stale/out-of-date entries from the `sliding_sync_joined_rooms` table. - FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the - foreground update for - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - https://github.com/element-hq/synapse/issues/TODO) + See `_clear_out_of_date_sliding_sync_tables()` description above for more context. """ # Find the point when we stopped writing to the `sliding_sync_joined_rooms` table @@ -673,22 +668,13 @@ def _clear_out_of_date_sliding_sync_joined_rooms_table( ) -def _clear_out_of_date_sliding_sync_membership_snapshots_table( +def _clear_stale_data_in_sliding_sync_membership_snapshots_table( txn: LoggingTransaction, ) -> None: """ - Clears out-of-date entries from the `sliding_sync_membership_snapshots` table. + Clears stale/out-of-date entries from the `sliding_sync_membership_snapshots` table. - This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any out-of-date/stale data in the - `sliding_sync_membership_snapshots` table since any new membership changes in rooms - would have also needed to be written to the `sliding_sync_membership_snapshots` - table. - - FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the - foreground update for - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - https://github.com/element-hq/synapse/issues/TODO) + See `_clear_out_of_date_sliding_sync_tables()` description above for more context. """ # Find the point when we stopped writing to the `sliding_sync_membership_snapshots` table From eb3c84cf45e67c25116de55deeb9a78275dd8e6c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 17:31:26 -0500 Subject: [PATCH 110/142] Kick-off background update for out-of-date snapshots --- synapse/storage/database.py | 4 +- .../databases/main/events_bg_updates.py | 5 ++ synapse/storage/prepare_database.py | 47 ++++++++++++++----- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index ba2616b4795..da50fd7f837 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1323,7 +1323,7 @@ def simple_upsert_txn_emulated( if lock: # We need to lock the table :( - self.engine.lock_table(txn, table) + txn.database_engine.lock_table(txn, table) def _getwhere(key: str) -> str: # If the value we're passing in is None (aka NULL), we need to use @@ -1377,8 +1377,8 @@ def _getwhere(key: str) -> str: # successfully inserted return True + @staticmethod def simple_upsert_txn_native_upsert( - self, txn: LoggingTransaction, table: str, keyvalues: Mapping[str, Any], diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index af0a1ba8805..1287b995fb2 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1552,6 +1552,11 @@ async def _sliding_sync_joined_rooms_bg_update( def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: # Fetch the set of room IDs that we want to update + # + # We use `current_state_events` table as the barometer for whether the + # server is still participating in the room because if we're + # `no_longer_in_room`, this table would be cleared out for the given + # `room_id`. txn.execute( """ SELECT DISTINCT room_id FROM current_state_events diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 01559da03f2..b127e83e612 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -42,6 +42,7 @@ LoggingDatabaseConnection, LoggingTransaction, ) +from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor @@ -576,12 +577,12 @@ def _upgrade_existing_database( # foreground update for # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by # https://github.com/element-hq/synapse/issues/TODO) - _clear_stale_data_in_sliding_sync_tables( + _resolve_stale_data_in_sliding_sync_tables( txn=cur, ) -def _clear_stale_data_in_sliding_sync_tables( +def _resolve_stale_data_in_sliding_sync_tables( txn: LoggingTransaction, ) -> None: """ @@ -598,7 +599,7 @@ def _clear_stale_data_in_sliding_sync_tables( `sliding_sync_membership_snapshots`. This way, if a row exists in the sliding sync tables, we are able to rely on it - (accurate data). So if a row doesn't exist, we use a fallback to get the same info + (accurate data). And if a row doesn't exist, we use a fallback to get the same info until the background updates fill in the rows or a new event comes in triggering it to be fully inserted. @@ -608,17 +609,20 @@ def _clear_stale_data_in_sliding_sync_tables( https://github.com/element-hq/synapse/issues/TODO) """ - _clear_stale_data_in_sliding_sync_joined_rooms_table(txn) - _clear_stale_data_in_sliding_sync_membership_snapshots_table(txn) + _resolve_stale_data_in_sliding_sync_joined_rooms_table(txn) + _resolve_stale_data_in_sliding_sync_membership_snapshots_table(txn) -def _clear_stale_data_in_sliding_sync_joined_rooms_table( +def _resolve_stale_data_in_sliding_sync_joined_rooms_table( txn: LoggingTransaction, ) -> None: """ - Clears stale/out-of-date entries from the `sliding_sync_joined_rooms` table. + Clears stale/out-of-date entries from the `sliding_sync_joined_rooms` table and + kicks-off the background update to catch-up with what we missed while Synapse was + downgraded. - See `_clear_out_of_date_sliding_sync_tables()` description above for more context. + See `_resolve_stale_data_in_sliding_sync_tables()` description above for more + context. """ # Find the point when we stopped writing to the `sliding_sync_joined_rooms` table @@ -668,13 +672,16 @@ def _clear_stale_data_in_sliding_sync_joined_rooms_table( ) -def _clear_stale_data_in_sliding_sync_membership_snapshots_table( +def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( txn: LoggingTransaction, ) -> None: """ - Clears stale/out-of-date entries from the `sliding_sync_membership_snapshots` table. + Clears stale/out-of-date entries from the `sliding_sync_membership_snapshots` table + and kicks-off the background update to catch-up with what we missed while Synapse + was downgraded. - See `_clear_out_of_date_sliding_sync_tables()` description above for more context. + See `_resolve_stale_data_in_sliding_sync_tables()` description above for more + context. """ # Find the point when we stopped writing to the `sliding_sync_membership_snapshots` table @@ -698,7 +705,7 @@ def _clear_stale_data_in_sliding_sync_membership_snapshots_table( txn.execute( """ SELECT DISTINCT(user_id, room_id) - FROM room_memberships + FROM local_current_membership WHERE event_stream_ordering > ? ORDER BY event_stream_ordering DESC """, @@ -723,6 +730,22 @@ def _clear_stale_data_in_sliding_sync_membership_snapshots_table( values=chunk, ) + # Now kick-off the background update to catch-up with what we missed while Synapse + # was downgraded. + DatabasePool.simple_upsert_txn_native_upsert( + txn, + table="background_updates", + keyvalues={ + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE + }, + values={}, + # Only insert the row if it doesn't already exist. If it already exists, we will + # eventually fill in the rows we're trying to populate. + insertion_values={ + "progress_json": f'{ "last_event_stream_ordering": {str(max_stream_ordering_sliding_sync_membership_snapshots_table)} }', + }, + ) + def _apply_module_schemas( txn: Cursor, database_engine: BaseDatabaseEngine, config: HomeServerConfig From 53473a0eb4a839d0f168c88e6d14f145d37fed7d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 18:35:49 -0500 Subject: [PATCH 111/142] Adapt `sliding_sync_joined_rooms` background update to use `event_stream_ordering` for progress This way we can re-use it for the catch-up background process --- .../databases/main/events_bg_updates.py | 118 ++++++++++++++---- synapse/storage/prepare_database.py | 16 +++ 2 files changed, 110 insertions(+), 24 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 1287b995fb2..38a66112bde 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -20,6 +20,7 @@ # import logging +from collections import OrderedDict from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast import attr @@ -112,6 +113,22 @@ class _CalculateChainCover: finished_room_map: Dict[str, Tuple[int, int]] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _JoinedRoomStreamOrderingUpdate: + """ + Intermediate container class used in `SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE` + """ + + # The most recent event stream_ordering for the room + most_recent_event_stream_ordering: int + # The most recent event `bump_stamp` for the room + most_recent_bump_stamp: Optional[int] + # The `stream_ordering` in the `current_state_delta_stream` that we got the state + # values from. We can use this to check if the current state has been updated since + # we last checked. + last_current_state_delta_stream_id: int + + class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseStore): def __init__( self, @@ -1548,28 +1565,49 @@ async def _sliding_sync_joined_rooms_bg_update( """ Background update to populate the `sliding_sync_joined_rooms` table. """ - last_room_id = progress.get("last_room_id", "") + last_event_stream_ordering = progress.get( + "last_event_stream_ordering", -(1 << 31) + ) - def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: + def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: + """ + Returns: + A list of room ID's to update along with the progress value + (event_stream_ordering) indicating the continuation point in the + `current_state_events` table for the next batch. + """ # Fetch the set of room IDs that we want to update # # We use `current_state_events` table as the barometer for whether the # server is still participating in the room because if we're # `no_longer_in_room`, this table would be cleared out for the given # `room_id`. + # + # Because we're using `event_stream_ordering` as the progress marker, we're + # going to be pulling out the same rooms over and over again but we can + # at-least re-use this background update for the catch-up background + # process as well (see `_resolve_stale_data_in_sliding_sync_tables()`). + # + # It's important to sort by `event_stream_ordering` *ascending* (oldest to + # newest) so that if we see that this background update in progress and want + # to start the catch-up process, we can safely assume that it will + # eventually get to the rooms we want to catch-up on anyway (see + # `_resolve_stale_data_in_sliding_sync_tables()`). txn.execute( """ - SELECT DISTINCT room_id FROM current_state_events - WHERE room_id > ? - ORDER BY room_id ASC + SELECT room_id, max(event_stream_ordering) + FROM current_state_events + WHERE event_stream_ordering > ? + GROUP BY room_id + ORDER BY event_stream_ordering ASC LIMIT ? """, - (last_room_id, batch_size), + (last_event_stream_ordering, batch_size), ) - rooms_to_update_rows = cast(List[Tuple[str]], txn.fetchall()) + rooms_to_update_rows = cast(List[Tuple[str, int]], txn.fetchall()) - return [row[0] for row in rooms_to_update_rows] + return rooms_to_update_rows rooms_to_update = await self.db_pool.runInteraction( "_sliding_sync_joined_rooms_bg_update._get_rooms_to_update_txn", @@ -1582,13 +1620,21 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: ) return 0 - # Map from room_id to insert/update state values in the `sliding_sync_joined_rooms` table + # Map from room_id to insert/update state values in the `sliding_sync_joined_rooms` table. joined_room_updates: Dict[str, SlidingSyncStateInsertValues] = {} # Map from room_id to stream_ordering/bump_stamp/last_current_state_delta_stream_id values joined_room_stream_ordering_updates: Dict[ - str, Tuple[int, Optional[int], int] + str, _JoinedRoomStreamOrderingUpdate ] = {} - for room_id in rooms_to_update: + # Map from room_id to the progress value (event_stream_ordering) + # + # This needs to be an `OrderedDict` because we need to process things in + # `event_stream_ordering` order *ascending* to save our progress position + # correctly if we need to exit early. + room_id_to_progress_marker_map: OrderedDict[str, int] = OrderedDict() + for room_id, progress_event_stream_ordering in rooms_to_update: + room_id_to_progress_marker_map[room_id] = progress_event_stream_ordering + current_state_ids_map, last_current_state_delta_stream_id = ( await self.db_pool.runInteraction( "_sliding_sync_joined_rooms_bg_update._get_relevant_sliding_sync_current_state_event_ids_txn", @@ -1645,21 +1691,36 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[str]: most_recent_bump_stamp = bump_stamp_event_pos_results[1].stream joined_room_stream_ordering_updates[room_id] = ( - most_recent_event_stream_ordering, - most_recent_bump_stamp, - last_current_state_delta_stream_id, + _JoinedRoomStreamOrderingUpdate( + most_recent_event_stream_ordering=most_recent_event_stream_ordering, + most_recent_bump_stamp=most_recent_bump_stamp, + last_current_state_delta_stream_id=last_current_state_delta_stream_id, + ) ) def _fill_table_txn(txn: LoggingTransaction) -> None: # Handle updating the `sliding_sync_joined_rooms` table # last_successful_room_id: Optional[str] = None - for room_id, insert_map in joined_room_updates.items(): - ( - event_stream_ordering, - bump_stamp, - last_current_state_delta_stream_id, - ) = joined_room_stream_ordering_updates[room_id] + # Process the rooms in `event_stream_ordering` order *ascending* so we can + # save our position correctly if we need to exit early. + # `progress_event_stream_ordering` is an `OrderedDict` which remembers + # insertion order (and we inserted in the correct order) so this should be + # the correct thing to do. + for ( + room_id, + progress_event_stream_ordering, + ) in room_id_to_progress_marker_map.items(): + update_map = joined_room_updates[room_id] + + joined_room_update = joined_room_stream_ordering_updates[room_id] + event_stream_ordering = ( + joined_room_update.most_recent_event_stream_ordering + ) + bump_stamp = joined_room_update.most_recent_bump_stamp + last_current_state_delta_stream_id = ( + joined_room_update.last_current_state_delta_stream_id + ) # Check if the current state has been updated since we gathered it state_deltas_since_we_gathered_current_state = ( @@ -1673,7 +1734,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: ) ) for state_delta in state_deltas_since_we_gathered_current_state: - # We only need to check if the state is relevant to the + # We only need to check for the state is relevant to the # `sliding_sync_joined_rooms` table. if ( state_delta.event_type, @@ -1684,7 +1745,9 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: self.db_pool.updates._background_update_progress_txn( txn, _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - {"last_room_id": room_id}, + { + "last_event_stream_ordering": progress_event_stream_ordering + }, ) # Raising exception so we can just exit and try again. It would # be hard to resolve this within the transaction because we need @@ -1706,7 +1769,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: txn, table="sliding_sync_joined_rooms", keyvalues={"room_id": room_id}, - values=insert_map, + values=update_map, insertion_values={ # The reason we're only *inserting* (not *updating*) `event_stream_ordering` # and `bump_stamp` is because if they are present, that means they are already @@ -1724,9 +1787,10 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: ) # Update the progress + _ = room_id_to_progress_marker_map.values() await self.db_pool.updates._background_update_progress( _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - {"last_room_id": rooms_to_update[-1]}, + {"last_event_stream_ordering": rooms_to_update[-1][1]}, ) return len(rooms_to_update) @@ -1745,6 +1809,12 @@ def _find_memberships_to_update_txn( txn: LoggingTransaction, ) -> List[Tuple[str, str, str, str, str, int, bool]]: # Fetch the set of event IDs that we want to update + # + # It's important to sort by `event_stream_ordering` *ascending* (oldest to + # newest) so that if we see that this background update in progress and want + # to start the catch-up process, we can safely assume that it will + # eventually get to the rooms we want to catch-up on anyway (see + # `_resolve_stale_data_in_sliding_sync_tables()`). txn.execute( """ SELECT diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b127e83e612..13dcf9803d3 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -671,6 +671,22 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( values=chunk, ) + # Now kick-off the background update to catch-up with what we missed while Synapse + # was downgraded. + DatabasePool.simple_upsert_txn_native_upsert( + txn, + table="background_updates", + keyvalues={ + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE + }, + values={}, + # Only insert the row if it doesn't already exist. If it already exists, we will + # eventually fill in the rows we're trying to populate. + insertion_values={ + "progress_json": f'{ "last_event_stream_ordering": {str(max_stream_ordering_sliding_sync_joined_rooms_table)} }', + }, + ) + def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( txn: LoggingTransaction, From 7fe5d31e201d0e89b898608f20cc8e98e0d31e4f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 18:52:10 -0500 Subject: [PATCH 112/142] Note down caveat about `forgotten` --- synapse/storage/prepare_database.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 13dcf9803d3..9d712d41182 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -718,6 +718,12 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( max_stream_ordering_sliding_sync_membership_snapshots_table = row[0] + # XXX: Since `forgotten` is simply a flag on the `room_memberships` table that is + # set out-of-band, there is no way to tell whether it was set while Synapse was + # downgraded. The only thing the user can do is `/forget` again if they run into + # this. + # + # This only picks up changes to memberships. txn.execute( """ SELECT DISTINCT(user_id, room_id) From 7a0c2810286463e9e55cbe463c4bea39d55e3fd4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 26 Aug 2024 19:43:52 -0500 Subject: [PATCH 113/142] Add placeholder tests --- tests/storage/test_sliding_sync_tables.py | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index fb2340b446b..012176784cf 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -4172,3 +4172,27 @@ def test_membership_snapshots_background_update_forgotten_partial(self) -> None: sliding_sync_membership_snapshots_results.get((room_id, user2_id)), user2_snapshot, ) + + +class SlidingSyncTablesCatchUpBackgroundUpdatesTestCase(SlidingSyncTablesTestCaseBase): + """ + Test the background updates for catch-up after Synapse downgrade populate the `sliding_sync_joined_rooms` and + `sliding_sync_membership_snapshots` tables. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/TODO) + """ + + def test_joined_background_update_catch_up(self) -> None: + """ + TODO + """ + pass + + def test_membership_snapshots_background_update_catch_up(self) -> None: + """ + TODO + """ + pass From 9764f626ea208be584424bf79b777e85f4b2ca92 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 12:09:00 -0500 Subject: [PATCH 114/142] Fix query in Postgres --- synapse/storage/databases/main/events_bg_updates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 38a66112bde..52b4450bbcb 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1595,11 +1595,11 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: # `_resolve_stale_data_in_sliding_sync_tables()`). txn.execute( """ - SELECT room_id, max(event_stream_ordering) + SELECT room_id, MAX(event_stream_ordering) FROM current_state_events WHERE event_stream_ordering > ? GROUP BY room_id - ORDER BY event_stream_ordering ASC + ORDER BY MAX(event_stream_ordering) ASC LIMIT ? """, (last_event_stream_ordering, batch_size), From c51a309da59e6ac50467ffcdce70ad7fc21c58f9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 17:11:51 -0500 Subject: [PATCH 115/142] Maybe: always start background update --- synapse/storage/prepare_database.py | 96 +++++++++++++++++------------ 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 9d712d41182..16b4dea5234 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -24,6 +24,7 @@ import re from collections import Counter from typing import ( + cast, Collection, Counter as CounterType, Generator, @@ -36,6 +37,7 @@ import attr +from synapse.util import Clock, json_encoder from synapse.config.homeserver import HomeServerConfig from synapse.storage.database import ( DatabasePool, @@ -683,7 +685,13 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # Only insert the row if it doesn't already exist. If it already exists, we will # eventually fill in the rows we're trying to populate. insertion_values={ - "progress_json": f'{ "last_event_stream_ordering": {str(max_stream_ordering_sliding_sync_joined_rooms_table)} }', + "progress_json": json_encoder.encode( + { + "last_event_stream_ordering": { + str(max_stream_ordering_sliding_sync_joined_rooms_table) + } + } + ), }, ) @@ -710,50 +718,58 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( """, ) - row = txn.fetchone() - # We have nothing written to the `sliding_sync_membership_snapshots` table so there is - # nothing to clean up - if row is None: - return - - max_stream_ordering_sliding_sync_membership_snapshots_table = row[0] - - # XXX: Since `forgotten` is simply a flag on the `room_memberships` table that is - # set out-of-band, there is no way to tell whether it was set while Synapse was - # downgraded. The only thing the user can do is `/forget` again if they run into - # this. - # - # This only picks up changes to memberships. - txn.execute( - """ - SELECT DISTINCT(user_id, room_id) - FROM local_current_membership - WHERE event_stream_ordering > ? - ORDER BY event_stream_ordering DESC - """, - (max_stream_ordering_sliding_sync_membership_snapshots_table,), - ) - - membership_rows = txn.fetchall() - # No new events have been written to the `events` table since the last time we wrote - # to the `sliding_sync_membership_snapshots` table so there is nothing to clean up. - # This is the expected normal scenario for people who have not downgraded their - # Synapse version. - if not membership_rows: - return + # If we have nothing written to the `sliding_sync_membership_snapshots` table, + # there is nothing to clean up + row = cast(Tuple[int], txn.fetchone()) + max_stream_ordering_sliding_sync_membership_snapshots_table = None + if row is not None: + max_stream_ordering_sliding_sync_membership_snapshots_table = row[0] - for chunk in batch_iter(membership_rows, 1000): - # Handle updating the `sliding_sync_membership_snapshots` table + # XXX: Since `forgotten` is simply a flag on the `room_memberships` table that is + # set out-of-band, there is no way to tell whether it was set while Synapse was + # downgraded. The only thing the user can do is `/forget` again if they run into + # this. # - DatabasePool.simple_delete_many_batch_txn( - txn, - table="sliding_sync_membership_snapshots", - keys=("user_id", "room_id"), - values=chunk, + # This only picks up changes to memberships. + txn.execute( + """ + SELECT DISTINCT(user_id, room_id) + FROM local_current_membership + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering DESC + """, + (max_stream_ordering_sliding_sync_membership_snapshots_table,), ) + membership_rows = txn.fetchall() + # No new events have been written to the `events` table since the last time we wrote + # to the `sliding_sync_membership_snapshots` table so there is nothing to clean up. + # This is the expected normal scenario for people who have not downgraded their + # Synapse version. + if not membership_rows: + return + + for chunk in batch_iter(membership_rows, 1000): + # Handle updating the `sliding_sync_membership_snapshots` table + # + DatabasePool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_membership_snapshots", + keys=("user_id", "room_id"), + values=chunk, + ) + # Now kick-off the background update to catch-up with what we missed while Synapse # was downgraded. + # + progress_json = {} + if max_stream_ordering_sliding_sync_membership_snapshots_table is not None: + progress_json["last_event_stream_ordering"] = ( + max_stream_ordering_sliding_sync_membership_snapshots_table + ) + + # We still need to kick off the background update to catch-up regardless of whether + # there was anything to clean up. DatabasePool.simple_upsert_txn_native_upsert( txn, table="background_updates", @@ -764,7 +780,7 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( # Only insert the row if it doesn't already exist. If it already exists, we will # eventually fill in the rows we're trying to populate. insertion_values={ - "progress_json": f'{ "last_event_stream_ordering": {str(max_stream_ordering_sliding_sync_membership_snapshots_table)} }', + "progress_json": json_encoder.encode(progress_json), }, ) From 9a7d8c2be44072da69b12a3edee02f04271d94c8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 17:28:07 -0500 Subject: [PATCH 116/142] Start catch-up if nothing written yet --- synapse/storage/prepare_database.py | 97 ++++++++++++++++------------- 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 16b4dea5234..2527766e2d1 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -48,6 +48,7 @@ from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor +from synapse.types import JsonDict from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) @@ -637,44 +638,56 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( """, ) - row = txn.fetchone() - # We have nothing written to the `sliding_sync_joined_rooms` table so there is + # If we have nothing written to the `sliding_sync_joined_rooms` table, there is # nothing to clean up - if row is None: - return - - max_stream_ordering_sliding_sync_joined_rooms_table = row[0] + row = cast(Optional[Tuple[int]], txn.fetchone()) + max_stream_ordering_sliding_sync_joined_rooms_table = None + if row is not None: + (max_stream_ordering_sliding_sync_joined_rooms_table,) = row - txn.execute( - """ - SELECT DISTINCT(room_id) - FROM events - WHERE stream_ordering > ? - ORDER BY stream_ordering DESC - """, - (max_stream_ordering_sliding_sync_joined_rooms_table,), - ) + txn.execute( + """ + SELECT DISTINCT(room_id) + FROM events + WHERE stream_ordering > ? + ORDER BY stream_ordering DESC + """, + (max_stream_ordering_sliding_sync_joined_rooms_table,), + ) - room_rows = txn.fetchall() - # No new events have been written to the `events` table since the last time we wrote - # to the `sliding_sync_joined_rooms` table so there is nothing to clean up. This is - # the expected normal scenario for people who have not downgraded their Synapse - # version. - if not room_rows: - return + room_rows = txn.fetchall() + # No new events have been written to the `events` table since the last time we wrote + # to the `sliding_sync_joined_rooms` table so there is nothing to clean up. This is + # the expected normal scenario for people who have not downgraded their Synapse + # version. + if not room_rows: + return - for chunk in batch_iter(room_rows, 1000): - # Handle updating the `sliding_sync_joined_rooms` table - # - DatabasePool.simple_delete_many_batch_txn( - txn, - table="sliding_sync_joined_rooms", - keys=("room_id",), - values=chunk, - ) + # 1000 is an arbitrary batch size with no testing + for chunk in batch_iter(room_rows, 1000): + # Handle updating the `sliding_sync_joined_rooms` table + # + DatabasePool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_joined_rooms", + keys=("room_id",), + values=chunk, + ) # Now kick-off the background update to catch-up with what we missed while Synapse # was downgraded. + # + # We may need to catch-up on everything if we have nothing written to the + # `sliding_sync_joined_rooms` table yet. This could happen if someone had zero rooms + # on their server (so the normal background update completes), downgrade Synapse + # versions, join and create some new rooms, and upgrade again. + # + progress_json: JsonDict = {} + if max_stream_ordering_sliding_sync_joined_rooms_table is not None: + progress_json["last_event_stream_ordering"] = ( + max_stream_ordering_sliding_sync_joined_rooms_table + ) + DatabasePool.simple_upsert_txn_native_upsert( txn, table="background_updates", @@ -685,13 +698,7 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # Only insert the row if it doesn't already exist. If it already exists, we will # eventually fill in the rows we're trying to populate. insertion_values={ - "progress_json": json_encoder.encode( - { - "last_event_stream_ordering": { - str(max_stream_ordering_sliding_sync_joined_rooms_table) - } - } - ), + "progress_json": json_encoder.encode(progress_json), }, ) @@ -720,10 +727,10 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( # If we have nothing written to the `sliding_sync_membership_snapshots` table, # there is nothing to clean up - row = cast(Tuple[int], txn.fetchone()) + row = cast(Optional[Tuple[int]], txn.fetchone()) max_stream_ordering_sliding_sync_membership_snapshots_table = None if row is not None: - max_stream_ordering_sliding_sync_membership_snapshots_table = row[0] + (max_stream_ordering_sliding_sync_membership_snapshots_table,) = row # XXX: Since `forgotten` is simply a flag on the `room_memberships` table that is # set out-of-band, there is no way to tell whether it was set while Synapse was @@ -749,6 +756,7 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( if not membership_rows: return + # 1000 is an arbitrary batch size with no testing for chunk in batch_iter(membership_rows, 1000): # Handle updating the `sliding_sync_membership_snapshots` table # @@ -762,14 +770,17 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( # Now kick-off the background update to catch-up with what we missed while Synapse # was downgraded. # - progress_json = {} + # We may need to catch-up on everything if we have nothing written to the + # `sliding_sync_membership_snapshots` table yet. This could happen if someone had + # zero rooms on their server (so the normal background update completes), downgrade + # Synapse versions, join and create some new rooms, and upgrade again. + # + progress_json: JsonDict = {} if max_stream_ordering_sliding_sync_membership_snapshots_table is not None: progress_json["last_event_stream_ordering"] = ( max_stream_ordering_sliding_sync_membership_snapshots_table ) - # We still need to kick off the background update to catch-up regardless of whether - # there was anything to clean up. DatabasePool.simple_upsert_txn_native_upsert( txn, table="background_updates", From 4dc9e268e68235d12bc3cb2119226660121b6d5d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 18:08:17 -0500 Subject: [PATCH 117/142] Add test for catch-up background update --- synapse/storage/prepare_database.py | 4 +- tests/storage/test_sliding_sync_tables.py | 105 ++++++++++++++++++++-- 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 2527766e2d1..31f99782eea 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -24,7 +24,6 @@ import re from collections import Counter from typing import ( - cast, Collection, Counter as CounterType, Generator, @@ -33,11 +32,11 @@ Optional, TextIO, Tuple, + cast, ) import attr -from synapse.util import Clock, json_encoder from synapse.config.homeserver import HomeServerConfig from synapse.storage.database import ( DatabasePool, @@ -49,6 +48,7 @@ from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor from synapse.types import JsonDict +from synapse.util import json_encoder from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 012176784cf..23a0aee25c4 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -34,6 +34,9 @@ from synapse.server import HomeServer from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates +from synapse.storage.prepare_database import ( + _resolve_stale_data_in_sliding_sync_joined_rooms_table, +) from synapse.util import Clock from tests.test_utils.event_injection import create_event @@ -4176,8 +4179,14 @@ def test_membership_snapshots_background_update_forgotten_partial(self) -> None: class SlidingSyncTablesCatchUpBackgroundUpdatesTestCase(SlidingSyncTablesTestCaseBase): """ - Test the background updates for catch-up after Synapse downgrade populate the `sliding_sync_joined_rooms` and - `sliding_sync_membership_snapshots` tables. + Test the background updates for catch-up after Synapse downgrade to populate the + `sliding_sync_joined_rooms` and `sliding_sync_membership_snapshots` tables. + + This to test the "catch-up" version of the background update vs the "normal" + background update to populate the tables with all of the historical data. Both + versions share the same background update but just serve different purposes. We + check if the "catch-up" version needs to run on start-up based on whether there have + been any changes to rooms that aren't reflected in the sliding sync tables. FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for @@ -4187,12 +4196,98 @@ class SlidingSyncTablesCatchUpBackgroundUpdatesTestCase(SlidingSyncTablesTestCas def test_joined_background_update_catch_up(self) -> None: """ - TODO + Test that new events while Synapse is downgraded (making + `sliding_sync_joined_rooms` stale) will be caught when Synapse is upgraded and + the catch-up routine is run. """ - pass + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Instead of testing with various levels of room state that should appear in the + # table, we're only using one room to keep this test simple. Because the + # underlying background update to populate these tables is the same as this + # catch-up routine, we are going to rely on + # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. + room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Get a snapshot of the `sliding_sync_joined_rooms` table before we add some state + sliding_sync_joined_rooms_results_before_state = ( + self._get_sliding_sync_joined_rooms() + ) + self.assertIncludes( + set(sliding_sync_joined_rooms_results_before_state.keys()), + {room_id_with_info}, + exact=True, + ) + + # Add a room name + self.helper.send_state( + room_id_with_info, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + + # Make sure all of the background updates have finished before we start the + # catch-up. Even though it should work fine if the other background update is + # still running, we want to see the catch-up routine restore the progress + # correctly. + # + # We also don't want the normal background update messing with our results so we + # run this before we do our manual database clean-up to simulate new events + # being sent while Synapse was downgraded. + self.wait_for_background_updates() + + # Clean-up the `sliding_sync_joined_rooms` table as if the the room name + # never made it into the table. This is to simulate the room name event + # being sent while Synapse was downgraded. + self.get_success( + self.store.db_pool.simple_update( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id_with_info}, + updatevalues={ + # Clear the room name + "room_name": None, + # Reset the `event_stream_ordering` back to the value before the room name + "event_stream_ordering": sliding_sync_joined_rooms_results_before_state[ + room_id_with_info + ].event_stream_ordering, + }, + desc="sliding_sync_joined_rooms.test_joined_background_update_catch_up", + ) + ) + + # The function under test. It should clear out stale data and start the + # background update to catch-up on the missing data. + self.get_success( + self.store.db_pool.runInteraction( + "_resolve_stale_data_in_sliding_sync_joined_rooms_table", + _resolve_stale_data_in_sliding_sync_joined_rooms_table, + ) + ) + + # Ensure that the stale data is deleted from the table + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Wait for the catch-up background update to finish + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Ensure that the table is populated correctly after the catch-up background + # update finishes + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id_with_info}, + exact=True, + ) def test_membership_snapshots_background_update_catch_up(self) -> None: """ TODO """ - pass From c8e17f7479d8e27144fb6705aa2b6797ff570532 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 18:21:25 -0500 Subject: [PATCH 118/142] Add test when no rooms --- tests/storage/test_sliding_sync_tables.py | 104 ++++++++++++++++++++-- 1 file changed, 97 insertions(+), 7 deletions(-) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 23a0aee25c4..01563d4a626 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -4208,7 +4208,7 @@ def test_joined_background_update_catch_up(self) -> None: # underlying background update to populate these tables is the same as this # catch-up routine, we are going to rely on # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. - room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Get a snapshot of the `sliding_sync_joined_rooms` table before we add some state sliding_sync_joined_rooms_results_before_state = ( @@ -4216,13 +4216,13 @@ def test_joined_background_update_catch_up(self) -> None: ) self.assertIncludes( set(sliding_sync_joined_rooms_results_before_state.keys()), - {room_id_with_info}, + {room_id}, exact=True, ) # Add a room name self.helper.send_state( - room_id_with_info, + room_id, EventTypes.Name, {"name": "my super duper room"}, tok=user1_tok, @@ -4244,16 +4244,16 @@ def test_joined_background_update_catch_up(self) -> None: self.get_success( self.store.db_pool.simple_update( table="sliding_sync_joined_rooms", - keyvalues={"room_id": room_id_with_info}, + keyvalues={"room_id": room_id}, updatevalues={ # Clear the room name "room_name": None, # Reset the `event_stream_ordering` back to the value before the room name "event_stream_ordering": sliding_sync_joined_rooms_results_before_state[ - room_id_with_info + room_id ].event_stream_ordering, }, - desc="sliding_sync_joined_rooms.test_joined_background_update_catch_up", + desc="simulate new events while Synapse was downgraded", ) ) @@ -4283,7 +4283,97 @@ def test_joined_background_update_catch_up(self) -> None: sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() self.assertIncludes( set(sliding_sync_joined_rooms_results.keys()), - {room_id_with_info}, + {room_id}, + exact=True, + ) + + def test_joined_background_update_catch_up_no_rooms(self) -> None: + """ + Test that if you start your homeserver with no rooms on a Synapse version that + supports the sliding sync tables and the historical background update completes + (because no rooms to process), then Synapse is downgraded and new rooms are + created/joined; when Synapse is upgraded, the rooms will be processed catch-up + routine is run. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Instead of testing with various levels of room state that should appear in the + # table, we're only using one room to keep this test simple. Because the + # underlying background update to populate these tables is the same as this + # catch-up routine, we are going to rely on + # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Get a snapshot of the `sliding_sync_joined_rooms` table before we add some state + sliding_sync_joined_rooms_results_before_state = ( + self._get_sliding_sync_joined_rooms() + ) + self.assertIncludes( + set(sliding_sync_joined_rooms_results_before_state.keys()), + {room_id}, + exact=True, + ) + + # Make sure all of the background updates have finished before we start the + # catch-up. Even though it should work fine if the other background update is + # still running, we want to see the catch-up routine restore the progress + # correctly. + # + # We also don't want the normal background update messing with our results so we + # run this before we do our manual database clean-up to simulate room being + # created while Synapse was downgraded. + self.wait_for_background_updates() + + # Clean-up the `sliding_sync_joined_rooms` table as if the the room never made + # it into the table. This is to simulate the room being created while Synapse + # was downgraded. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_joined_rooms", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="simulate room being created while Synapse was downgraded", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # The function under test. It should clear out stale data and start the + # background update to catch-up on the missing data. + self.get_success( + self.store.db_pool.runInteraction( + "_resolve_stale_data_in_sliding_sync_joined_rooms_table", + _resolve_stale_data_in_sliding_sync_joined_rooms_table, + ) + ) + + # We still shouldn't find any data yet + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Wait for the catch-up background update to finish + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Ensure that the table is populated correctly after the catch-up background + # update finishes + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, exact=True, ) From e5e7269998fe16b4125f122b716956600a5384d5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 18:49:53 -0500 Subject: [PATCH 119/142] Add more tests --- tests/storage/test_sliding_sync_tables.py | 184 +++++++++++++++++++++- 1 file changed, 182 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 01563d4a626..34c2ed70419 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -4194,7 +4194,74 @@ class SlidingSyncTablesCatchUpBackgroundUpdatesTestCase(SlidingSyncTablesTestCas https://github.com/element-hq/synapse/issues/TODO) """ - def test_joined_background_update_catch_up(self) -> None: + def test_joined_background_update_catch_up_new_room(self) -> None: + """ + Test that new rooms while Synapse is downgraded (making + `sliding_sync_joined_rooms` stale) will be caught when Synapse is upgraded and + the catch-up routine is run. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Instead of testing with various levels of room state that should appear in the + # table, we're only using one room to keep this test simple. Because the + # underlying background update to populate these tables is the same as this + # catch-up routine, we are going to rely on + # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Make sure all of the background updates have finished before we start the + # catch-up. Even though it should work fine if the other background update is + # still running, we want to see the catch-up routine restore the progress + # correctly. + # + # We also don't want the normal background update messing with our results so we + # run this before we do our manual database clean-up to simulate new events + # being sent while Synapse was downgraded. + self.wait_for_background_updates() + + # Clean-up the `sliding_sync_joined_rooms` table as if the the room never made + # it into the table. This is to simulate the a new room while Synapse was + # downgraded. + self.get_success( + self.store.db_pool.simple_delete( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + desc="simulate new room while Synapse was downgraded", + ) + ) + + # The function under test. It should clear out stale data and start the + # background update to catch-up on the missing data. + self.get_success( + self.store.db_pool.runInteraction( + "_resolve_stale_data_in_sliding_sync_joined_rooms_table", + _resolve_stale_data_in_sliding_sync_joined_rooms_table, + ) + ) + + # We shouldn't see any new data yet + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Wait for the catch-up background update to finish + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Ensure that the table is populated correctly after the catch-up background + # update finishes + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + + def test_joined_background_update_catch_up_room_state_change(self) -> None: """ Test that new events while Synapse is downgraded (making `sliding_sync_joined_rooms` stale) will be caught when Synapse is upgraded and @@ -4377,7 +4444,120 @@ def test_joined_background_update_catch_up_no_rooms(self) -> None: exact=True, ) - def test_membership_snapshots_background_update_catch_up(self) -> None: + def test_membership_snapshots_background_update_catch_up_new_membership( + self, + ) -> None: + """ + Test that completely new membership while Synapse is downgraded (making + `sliding_sync_membership_snapshots` stale) will be caught when Synapse is + upgraded and the catch-up routine is run. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Instead of testing with various levels of room state that should appear in the + # table, we're only using one room to keep this test simple. Because the + # underlying background update to populate these tables is the same as this + # catch-up routine, we are going to rely on + # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # User2 joins the room + self.helper.join(room_id, user2_id, tok=user2_tok) + + # Make sure all of the background updates have finished before we start the + # catch-up. Even though it should work fine if the other background update is + # still running, we want to see the catch-up routine restore the progress + # correctly. + # + # We also don't want the normal background update messing with our results so we + # run this before we do our manual database clean-up to simulate new events + # being sent while Synapse was downgraded. + self.wait_for_background_updates() + + # Clean-up the `sliding_sync_membership_snapshots` table as if the user2 + # membership never made it into the table. This is to simulate a membership + # change while Synapse was downgraded. + num_deleted = self.get_success( + self.store.db_pool.simple_delete( + table="sliding_sync_membership_snapshots", + keyvalues={"room_id": room_id, "user_id": user2_id}, + desc="simulate new membership while Synapse was downgraded", + ) + ) + self.assertGreater( + num_deleted, + 0, + f"Expected to delete one row but found none for ({room_id}, {user2_id})", + ) + + # We shouldn't find the user2 membership in the table because we just deleted it + # in preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + }, + exact=True, + ) + + # The function under test. It should clear out stale data and start the + # background update to catch-up on the missing data. + self.get_success( + self.store.db_pool.runInteraction( + "_resolve_stale_data_in_sliding_sync_joined_rooms_table", + _resolve_stale_data_in_sliding_sync_joined_rooms_table, + ) + ) + + # We still shouldn't find any data yet + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + }, + exact=True, + ) + + # Wait for the catch-up background update to finish + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Ensure that the table is populated correctly after the catch-up background + # update finishes + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + + def test_membership_snapshots_background_update_catch_up_membership_change( + self, + ) -> None: + """ + Test that membership changes while Synapse is downgraded (making + `sliding_sync_membership_snapshots` stale) will be caught when Synapse is upgraded and + the catch-up routine is run. + """ + TODO + + def test_membership_snapshots_background_update_catch_up_no_membership( + self, + ) -> None: """ TODO """ + TODO From 85a60c3132a2651463d80f5eec3b2ca3402d6575 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 19:27:24 -0500 Subject: [PATCH 120/142] More tests --- synapse/storage/prepare_database.py | 10 +- tests/storage/test_sliding_sync_tables.py | 156 ++++++++++++++++++++-- 2 files changed, 153 insertions(+), 13 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 31f99782eea..ccdce909088 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -647,10 +647,11 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( txn.execute( """ - SELECT DISTINCT(room_id) + SELECT room_id FROM events WHERE stream_ordering > ? - ORDER BY stream_ordering DESC + GROUP BY room_id + ORDER BY MAX(stream_ordering) ASC """, (max_stream_ordering_sliding_sync_joined_rooms_table,), ) @@ -740,10 +741,10 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( # This only picks up changes to memberships. txn.execute( """ - SELECT DISTINCT(user_id, room_id) + SELECT user_id, room_id FROM local_current_membership WHERE event_stream_ordering > ? - ORDER BY event_stream_ordering DESC + ORDER BY event_stream_ordering ASC """, (max_stream_ordering_sliding_sync_membership_snapshots_table,), ) @@ -781,6 +782,7 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( max_stream_ordering_sliding_sync_membership_snapshots_table ) + logger.info("asdf insert catch-up bg update progress_json %s", progress_json) DatabasePool.simple_upsert_txn_native_upsert( txn, table="background_updates", diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 34c2ed70419..e1b5c1325ed 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -36,6 +36,7 @@ from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates from synapse.storage.prepare_database import ( _resolve_stale_data_in_sliding_sync_joined_rooms_table, + _resolve_stale_data_in_sliding_sync_membership_snapshots_table, ) from synapse.util import Clock @@ -4466,6 +4467,19 @@ def test_membership_snapshots_background_update_catch_up_new_membership( # User2 joins the room self.helper.join(room_id, user2_id, tok=user2_tok) + # Both users are joined to the room + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + # Make sure all of the background updates have finished before we start the # catch-up. Even though it should work fine if the other background update is # still running, we want to see the catch-up routine restore the progress @@ -4479,18 +4493,13 @@ def test_membership_snapshots_background_update_catch_up_new_membership( # Clean-up the `sliding_sync_membership_snapshots` table as if the user2 # membership never made it into the table. This is to simulate a membership # change while Synapse was downgraded. - num_deleted = self.get_success( + self.get_success( self.store.db_pool.simple_delete( table="sliding_sync_membership_snapshots", keyvalues={"room_id": room_id, "user_id": user2_id}, desc="simulate new membership while Synapse was downgraded", ) ) - self.assertGreater( - num_deleted, - 0, - f"Expected to delete one row but found none for ({room_id}, {user2_id})", - ) # We shouldn't find the user2 membership in the table because we just deleted it # in preparation for the test. @@ -4509,8 +4518,8 @@ def test_membership_snapshots_background_update_catch_up_new_membership( # background update to catch-up on the missing data. self.get_success( self.store.db_pool.runInteraction( - "_resolve_stale_data_in_sliding_sync_joined_rooms_table", - _resolve_stale_data_in_sliding_sync_joined_rooms_table, + "_resolve_stale_data_in_sliding_sync_membership_snapshots_table", + _resolve_stale_data_in_sliding_sync_membership_snapshots_table, ) ) @@ -4552,7 +4561,136 @@ def test_membership_snapshots_background_update_catch_up_membership_change( `sliding_sync_membership_snapshots` stale) will be caught when Synapse is upgraded and the catch-up routine is run. """ - TODO + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Instead of testing with various levels of room state that should appear in the + # table, we're only using one room to keep this test simple. Because the + # underlying background update to populate these tables is the same as this + # catch-up routine, we are going to rely on + # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # User2 joins the room + self.helper.join(room_id, user2_id, tok=user2_tok) + + # Both users are joined to the room + sliding_sync_membership_snapshots_results_before_membership_changes = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set( + sliding_sync_membership_snapshots_results_before_membership_changes.keys() + ), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + + # User2 leaves the room + self.helper.leave(room_id, user2_id, tok=user2_tok) + + # Make sure all of the background updates have finished before we start the + # catch-up. Even though it should work fine if the other background update is + # still running, we want to see the catch-up routine restore the progress + # correctly. + # + # We also don't want the normal background update messing with our results so we + # run this before we do our manual database clean-up to simulate new events + # being sent while Synapse was downgraded. + self.wait_for_background_updates() + + # Rollback the `sliding_sync_membership_snapshots` table as if the user2 + # membership never made it into the table. This is to simulate a membership + # change while Synapse was downgraded. + self.get_success( + self.store.db_pool.simple_update( + table="sliding_sync_membership_snapshots", + keyvalues={"room_id": room_id, "user_id": user2_id}, + updatevalues={ + # Reset everything back to the value before user2 left the room + "membership": sliding_sync_membership_snapshots_results_before_membership_changes[ + (room_id, user2_id) + ].membership, + "membership_event_id": sliding_sync_membership_snapshots_results_before_membership_changes[ + (room_id, user2_id) + ].membership_event_id, + "event_stream_ordering": sliding_sync_membership_snapshots_results_before_membership_changes[ + (room_id, user2_id) + ].event_stream_ordering, + }, + desc="simulate membership change while Synapse was downgraded", + ) + ) + + # We should see user2 still joined to the room because we made that change in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user1_id)), + sliding_sync_membership_snapshots_results_before_membership_changes[ + (room_id, user1_id) + ], + ) + self.assertEqual( + sliding_sync_membership_snapshots_results.get((room_id, user2_id)), + sliding_sync_membership_snapshots_results_before_membership_changes[ + (room_id, user2_id) + ], + ) + + # The function under test. It should clear out stale data and start the + # background update to catch-up on the missing data. + self.get_success( + self.store.db_pool.runInteraction( + "_resolve_stale_data_in_sliding_sync_membership_snapshots_table", + _resolve_stale_data_in_sliding_sync_membership_snapshots_table, + ) + ) + + # Ensure that the stale data is deleted from the table + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + }, + exact=True, + ) + + # Wait for the catch-up background update to finish + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Ensure that the table is populated correctly after the catch-up background + # update finishes + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) def test_membership_snapshots_background_update_catch_up_no_membership( self, From 56a4c0ba6ea7c8fc0851b2192f8fb6c7fa752953 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 19:34:16 -0500 Subject: [PATCH 121/142] Round out tests --- tests/storage/test_sliding_sync_tables.py | 102 +++++++++++++++++++--- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index e1b5c1325ed..2654decb0c7 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -4373,16 +4373,6 @@ def test_joined_background_update_catch_up_no_rooms(self) -> None: # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. room_id = self.helper.create_room_as(user1_id, tok=user1_tok) - # Get a snapshot of the `sliding_sync_joined_rooms` table before we add some state - sliding_sync_joined_rooms_results_before_state = ( - self._get_sliding_sync_joined_rooms() - ) - self.assertIncludes( - set(sliding_sync_joined_rooms_results_before_state.keys()), - {room_id}, - exact=True, - ) - # Make sure all of the background updates have finished before we start the # catch-up. Even though it should work fine if the other background update is # still running, we want to see the catch-up routine restore the progress @@ -4561,7 +4551,6 @@ def test_membership_snapshots_background_update_catch_up_membership_change( `sliding_sync_membership_snapshots` stale) will be caught when Synapse is upgraded and the catch-up routine is run. """ - user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") user2_id = self.register_user("user2", "pass") @@ -4696,6 +4685,93 @@ def test_membership_snapshots_background_update_catch_up_no_membership( self, ) -> None: """ - TODO + Test that if you start your homeserver with no rooms on a Synapse version that + supports the sliding sync tables and the historical background update completes + (because no rooms/membership to process), then Synapse is downgraded and new + rooms are created/joined; when Synapse is upgraded, the rooms will be processed + catch-up routine is run. """ - TODO + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Instead of testing with various levels of room state that should appear in the + # table, we're only using one room to keep this test simple. Because the + # underlying background update to populate these tables is the same as this + # catch-up routine, we are going to rely on + # `SlidingSyncTablesBackgroundUpdatesTestCase` to cover that logic. + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # User2 joins the room + self.helper.join(room_id, user2_id, tok=user2_tok) + + # Make sure all of the background updates have finished before we start the + # catch-up. Even though it should work fine if the other background update is + # still running, we want to see the catch-up routine restore the progress + # correctly. + # + # We also don't want the normal background update messing with our results so we + # run this before we do our manual database clean-up to simulate new events + # being sent while Synapse was downgraded. + self.wait_for_background_updates() + + # Rollback the `sliding_sync_membership_snapshots` table as if the user2 + # membership never made it into the table. This is to simulate a membership + # change while Synapse was downgraded. + self.get_success( + self.store.db_pool.simple_delete_many( + table="sliding_sync_membership_snapshots", + column="room_id", + iterable=(room_id,), + keyvalues={}, + desc="simulate room being created while Synapse was downgraded", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # The function under test. It should clear out stale data and start the + # background update to catch-up on the missing data. + self.get_success( + self.store.db_pool.runInteraction( + "_resolve_stale_data_in_sliding_sync_membership_snapshots_table", + _resolve_stale_data_in_sliding_sync_membership_snapshots_table, + ) + ) + + # We still shouldn't find any data yet + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + set(), + exact=True, + ) + + # Wait for the catch-up background update to finish + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # Ensure that the table is populated correctly after the catch-up background + # update finishes + sliding_sync_membership_snapshots_results = ( + self._get_sliding_sync_membership_snapshots() + ) + self.assertIncludes( + set(sliding_sync_membership_snapshots_results.keys()), + { + (room_id, user1_id), + (room_id, user2_id), + }, + exact=True, + ) From 9d08bc21577ab6fdcd9c948ed02bf71c47138507 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 19:35:05 -0500 Subject: [PATCH 122/142] Remove debug logs --- synapse/storage/prepare_database.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index ccdce909088..034e6f6ccd4 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -782,7 +782,6 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( max_stream_ordering_sliding_sync_membership_snapshots_table ) - logger.info("asdf insert catch-up bg update progress_json %s", progress_json) DatabasePool.simple_upsert_txn_native_upsert( txn, table="background_updates", From a507f152c907681431c96e8eb4ff8cce177177e1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 19:45:50 -0500 Subject: [PATCH 123/142] Use `stream_id` of some point before we fetch the current state This is simpler and some rooms are so old that they don't have `current_state_delta_stream` yet. It's easier if we just get a general max `stream_id` of the whole table than the max `stream_id` for the specific room anyway. Thanks @erikjohnston --- synapse/storage/databases/main/events.py | 21 ++------------ .../databases/main/events_bg_updates.py | 29 ++++++++----------- 2 files changed, 14 insertions(+), 36 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index fa41d339205..f8d176d1333 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1849,7 +1849,7 @@ def _update_current_state_txn( @classmethod def _get_relevant_sliding_sync_current_state_event_ids_txn( cls, txn: LoggingTransaction, room_id: str - ) -> Tuple[MutableStateMap[str], int]: + ) -> MutableStateMap[str]: """ Fetch the current state event IDs for the relevant (to the `sliding_sync_joined_rooms` table) state types for the given room. @@ -1888,24 +1888,7 @@ def _get_relevant_sliding_sync_current_state_event_ids_txn( (event_type, state_key): event_id for event_id, event_type, state_key in txn } - txn.execute( - """ - SELECT stream_id - FROM current_state_delta_stream - WHERE - room_id = ? - ORDER BY stream_id DESC - LIMIT 1 - """, - (room_id,), - ) - row = txn.fetchone() - # If we're able to fetch the `current_state_events` above, we should have rows - # in `current_state_delta_stream` as well. - assert row, "Failed to fetch the `last_current_state_delta_stream_id`" - last_current_state_delta_stream_id = row[0] - - return current_state_map, last_current_state_delta_stream_id + return current_state_map @classmethod def _get_sliding_sync_insert_values_from_state_map( diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 52b4450bbcb..38a786c001a 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -123,10 +123,6 @@ class _JoinedRoomStreamOrderingUpdate: most_recent_event_stream_ordering: int # The most recent event `bump_stamp` for the room most_recent_bump_stamp: Optional[int] - # The `stream_ordering` in the `current_state_delta_stream` that we got the state - # values from. We can use this to check if the current state has been updated since - # we last checked. - last_current_state_delta_stream_id: int class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseStore): @@ -1622,7 +1618,7 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: # Map from room_id to insert/update state values in the `sliding_sync_joined_rooms` table. joined_room_updates: Dict[str, SlidingSyncStateInsertValues] = {} - # Map from room_id to stream_ordering/bump_stamp/last_current_state_delta_stream_id values + # Map from room_id to stream_ordering/bump_stamp, etc values joined_room_stream_ordering_updates: Dict[ str, _JoinedRoomStreamOrderingUpdate ] = {} @@ -1632,15 +1628,18 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: # `event_stream_ordering` order *ascending* to save our progress position # correctly if we need to exit early. room_id_to_progress_marker_map: OrderedDict[str, int] = OrderedDict() + # As long as we get this value before we fetch the current state, we can use it + # to check if something has changed since that point. + most_recent_current_state_delta_stream_id = ( + await self.get_max_stream_id_in_current_state_deltas() + ) for room_id, progress_event_stream_ordering in rooms_to_update: room_id_to_progress_marker_map[room_id] = progress_event_stream_ordering - current_state_ids_map, last_current_state_delta_stream_id = ( - await self.db_pool.runInteraction( - "_sliding_sync_joined_rooms_bg_update._get_relevant_sliding_sync_current_state_event_ids_txn", - PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn, - room_id, - ) + current_state_ids_map = await self.db_pool.runInteraction( + "_sliding_sync_joined_rooms_bg_update._get_relevant_sliding_sync_current_state_event_ids_txn", + PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn, + room_id, ) # We're iterating over rooms pulled from the current_state_events table # so we should have some current state for each room @@ -1694,7 +1693,6 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: _JoinedRoomStreamOrderingUpdate( most_recent_event_stream_ordering=most_recent_event_stream_ordering, most_recent_bump_stamp=most_recent_bump_stamp, - last_current_state_delta_stream_id=last_current_state_delta_stream_id, ) ) @@ -1718,9 +1716,6 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: joined_room_update.most_recent_event_stream_ordering ) bump_stamp = joined_room_update.most_recent_bump_stamp - last_current_state_delta_stream_id = ( - joined_room_update.last_current_state_delta_stream_id - ) # Check if the current state has been updated since we gathered it state_deltas_since_we_gathered_current_state = ( @@ -1728,7 +1723,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: txn, room_id, from_token=RoomStreamToken( - stream=last_current_state_delta_stream_id + stream=most_recent_current_state_delta_stream_id ), to_token=None, ) @@ -1763,7 +1758,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: # Since we partially update the `sliding_sync_joined_rooms` as new state # is sent, we need to update the state fields `ON CONFLICT`. We just # have to be careful we're not overwriting it with stale data (see - # `last_current_state_delta_stream_id` check above). + # `most_recent_current_state_delta_stream_id` check above). # self.db_pool.simple_upsert_txn( txn, From 94e1a5468783d4ba2b94793206deac047fe3c236 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 20:01:55 -0500 Subject: [PATCH 124/142] `get_events(...)` will omit events from unknown room versions Thanks @erikjohnston --- .../databases/main/events_bg_updates.py | 24 +++++++++++++++++++ .../storage/databases/main/events_worker.py | 6 +++++ 2 files changed, 30 insertions(+) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 38a786c001a..566589ed079 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1650,8 +1650,16 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: current_state_map: StateMap[EventBase] = { state_key: fetched_events[event_id] for state_key, event_id in current_state_ids_map.items() + # `get_events(...)` will filter out events for unknown room versions + if event_id in fetched_events } + # Can happen for unknown room versions (old room versions that aren't known + # anymore) since `get_events(...)` will filter out events for unknown room + # versions + if not current_state_map: + continue + state_insert_values = ( PersistEventsStore._get_sliding_sync_insert_values_from_state_map( current_state_map @@ -1929,8 +1937,16 @@ def _find_previous_membership_txn( current_state_map: StateMap[EventBase] = { state_key: fetched_events[event_id] for state_key, event_id in current_state_ids_map.items() + # `get_events(...)` will filter out events for unknown room versions + if event_id in fetched_events } + # Can happen for unknown room versions (old room versions that aren't known + # anymore) since `get_events(...)` will filter out events for unknown room + # versions + if not current_state_map: + continue + state_insert_values = ( PersistEventsStore._get_sliding_sync_insert_values_from_state_map( current_state_map @@ -2006,8 +2022,16 @@ def _find_previous_membership_txn( state_map: StateMap[EventBase] = { state_key: fetched_events[event_id] for state_key, event_id in state_ids_map.items() + # `get_events(...)` will filter out events for unknown room versions + if event_id in fetched_events } + # Can happen for unknown room versions (old room versions that aren't known + # anymore) since `get_events(...)` will filter out events for unknown room + # versions + if not state_map: + continue + state_insert_values = ( PersistEventsStore._get_sliding_sync_insert_values_from_state_map( state_map diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index cf24d845547..6079cc4a52c 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -457,6 +457,8 @@ async def get_event( ) -> Optional[EventBase]: """Get an event from the database by event_id. + Events for unknown room versions will also be filtered out. + Args: event_id: The event_id of the event to fetch @@ -513,6 +515,8 @@ async def get_events( Unknown events will be omitted from the response. + Events for unknown room versions will also be filtered out. + Args: event_ids: The event_ids of the events to fetch @@ -555,6 +559,8 @@ async def get_events_as_list( Unknown events will be omitted from the response. + Events for unknown room versions will also be filtered out. + Args: event_ids: The event_ids of the events to fetch From 53b7309f6c1d882003d34a5555e6c1f12c61c854 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 27 Aug 2024 20:48:56 -0500 Subject: [PATCH 125/142] Add `sliding_sync_joined_rooms_to_recalculate` table --- .../databases/main/events_bg_updates.py | 47 +++++++++++++++++++ .../delta/87/01_sliding_sync_memberships.sql | 31 +++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 566589ed079..c520faa9e7a 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -89,6 +89,12 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" + SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = ( + "sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update" + ) + SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = ( + "sliding_sync_index_joined_rooms_to_recalculate_table_bg_update" + ) SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE = "sliding_sync_joined_rooms_bg_update" SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE = ( "sliding_sync_membership_snapshots_bg_update" @@ -307,6 +313,19 @@ def __init__( where_clause="NOT outlier", ) + # Handle background updates for Sliding Sync tables + # + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + self._sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update, + ) + self.db_pool.updates.register_background_index_update( + _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + index_name="sliding_sync_joined_rooms_to_recalculate_room_id_idx", + table="sliding_sync_joined_rooms", + columns=["room_id"], + unique=True, + ) # Add some background updates to populate the sliding sync tables self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, @@ -1555,6 +1574,34 @@ def _populate_txn(txn: LoggingTransaction) -> bool: return batch_size + async def _sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update( + self, _progress: JsonDict, _batch_size: int + ) -> int: + """ + Prefill `sliding_sync_joined_rooms_to_recalculate` table with all rooms we know about already. + """ + + def _txn(txn: LoggingTransaction) -> None: + # We do this as one big bulk insert. This has been tested on a bigger + # homeserver with ~10M rooms and took 11s. There is potential for this to + # starve disk usage while this goes on. + txn.execute( + """ + INSERT INTO sliding_sync_joined_rooms_to_recalculate (room_id) SELECT room_id FROM rooms; + """, + ) + + await self.db_pool.runInteraction( + "_sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update", + _txn, + ) + + # Background update is done. + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE + ) + return 0 + async def _sliding_sync_joined_rooms_bg_update( self, progress: JsonDict, batch_size: int ) -> int: diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 8d7607c15f0..6ab1897230f 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -11,6 +11,18 @@ -- See the GNU Affero General Public License for more details: -- . +-- This table is a list/queue used to keep track of which rooms need to be inserted into +-- `sliding_sync_joined_rooms`. We do this to avoid reading from `current_state_events` +-- during the background update to populate `sliding_sync_joined_rooms` which works but +-- it takes a lot of work for the database to grab `DISTINCT` room_ids given how many +-- state events there are for each room. +-- +-- This table doesn't have any indexes at this point. We add the indexes in a separate +-- step to avoid the extra calculations during the bulk one-shot prefill insert. +CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms_to_recalculate( + room_id TEXT NOT NULL REFERENCES rooms(room_id) +); + -- A table for storing room meta data (current state relevant to sliding sync) that the -- local server is still participating in (someone local is joined to the room). -- @@ -127,8 +139,23 @@ CREATE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_user_id ON sliding_ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream_ordering ON sliding_sync_membership_snapshots(event_stream_ordering); --- Add some background updates to populate the new tables +-- Add a series of background updates to populate the new `sliding_sync_joined_rooms` table: +-- +-- 1. Add a background update to prefill `sliding_sync_joined_rooms_to_recalculate`. +-- We do a one-shot bulk insert from the `rooms` table to prefill. +-- 2. Add a background update to add indexes to the +-- `sliding_sync_joined_rooms_to_recalculate` table after the one-shot bulk insert. +-- We add the index in a separate step after to avoid the extra calculations during +-- the one-shot bulk insert. +-- 3. Add a background update to populate the new `sliding_sync_joined_rooms` table +-- INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (8701, 'sliding_sync_joined_rooms_bg_update', '{}'); + (8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{}'); +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (8701, 'sliding_sync_index_joined_rooms_to_recalculate_table_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update'); +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_index_joined_rooms_to_calculate_table_bg_update'); + +-- Add a background updates to populate the new `sliding_sync_membership_snapshots` table INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (8701, 'sliding_sync_membership_snapshots_bg_update', '{}'); From 8468401a97ea05380295ec123d151c392014cb43 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 00:42:14 -0500 Subject: [PATCH 126/142] Adapt to using `sliding_sync_joined_rooms_to_recalculate` table --- synapse/storage/database.py | 2 +- .../databases/main/events_bg_updates.py | 135 ++++++++---------- synapse/storage/prepare_database.py | 52 +++++-- .../delta/87/01_sliding_sync_memberships.sql | 2 +- tests/storage/test__base.py | 18 +++ tests/storage/test_sliding_sync_tables.py | 42 +++++- 6 files changed, 164 insertions(+), 87 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index da50fd7f837..d6660391205 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1536,8 +1536,8 @@ def simple_upsert_many_txn_emulated( self.simple_upsert_txn_emulated(txn, table, _keys, _vals, lock=False) + @staticmethod def simple_upsert_many_txn_native_upsert( - self, txn: LoggingTransaction, table: str, key_names: Collection[str], diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index c520faa9e7a..85014719ae2 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -20,7 +20,6 @@ # import logging -from collections import OrderedDict from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast import attr @@ -322,7 +321,7 @@ def __init__( self.db_pool.updates.register_background_index_update( _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, index_name="sliding_sync_joined_rooms_to_recalculate_room_id_idx", - table="sliding_sync_joined_rooms", + table="sliding_sync_joined_rooms_to_recalculate", columns=["room_id"], unique=True, ) @@ -1575,21 +1574,43 @@ def _populate_txn(txn: LoggingTransaction) -> bool: return batch_size async def _sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update( - self, _progress: JsonDict, _batch_size: int + self, progress: JsonDict, _batch_size: int ) -> int: """ Prefill `sliding_sync_joined_rooms_to_recalculate` table with all rooms we know about already. """ + initial_insert = progress.get("initial_insert", False) def _txn(txn: LoggingTransaction) -> None: # We do this as one big bulk insert. This has been tested on a bigger # homeserver with ~10M rooms and took 11s. There is potential for this to # starve disk usage while this goes on. - txn.execute( - """ - INSERT INTO sliding_sync_joined_rooms_to_recalculate (room_id) SELECT room_id FROM rooms; - """, - ) + if initial_insert: + txn.execute( + """ + INSERT INTO sliding_sync_joined_rooms_to_recalculate + (room_id) + SELECT room_id FROM rooms; + """, + ) + else: + # We can only upsert once the unique index has been added to the table + # (see + # `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`) + # + # We upsert in case we have to run this multiple times. + # + # The `WHERE TRUE` clause is to avoid "Parsing Ambiguity" + txn.execute( + """ + INSERT INTO sliding_sync_joined_rooms_to_recalculate + (room_id) + SELECT room_id FROM rooms WHERE ? + ON CONFLICT (room_id) + DO NOTHING; + """, + (True,), + ) await self.db_pool.runInteraction( "_sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update", @@ -1608,11 +1629,10 @@ async def _sliding_sync_joined_rooms_bg_update( """ Background update to populate the `sliding_sync_joined_rooms` table. """ - last_event_stream_ordering = progress.get( - "last_event_stream_ordering", -(1 << 31) - ) + # We don't need to fetch any progress state because we just grab the next N + # events in `sliding_sync_joined_rooms_to_recalculate` - def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: + def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str]]: """ Returns: A list of room ID's to update along with the progress value @@ -1625,30 +1645,16 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: # server is still participating in the room because if we're # `no_longer_in_room`, this table would be cleared out for the given # `room_id`. - # - # Because we're using `event_stream_ordering` as the progress marker, we're - # going to be pulling out the same rooms over and over again but we can - # at-least re-use this background update for the catch-up background - # process as well (see `_resolve_stale_data_in_sliding_sync_tables()`). - # - # It's important to sort by `event_stream_ordering` *ascending* (oldest to - # newest) so that if we see that this background update in progress and want - # to start the catch-up process, we can safely assume that it will - # eventually get to the rooms we want to catch-up on anyway (see - # `_resolve_stale_data_in_sliding_sync_tables()`). txn.execute( """ - SELECT room_id, MAX(event_stream_ordering) - FROM current_state_events - WHERE event_stream_ordering > ? - GROUP BY room_id - ORDER BY MAX(event_stream_ordering) ASC + SELECT room_id + FROM sliding_sync_joined_rooms_to_recalculate LIMIT ? """, - (last_event_stream_ordering, batch_size), + (batch_size,), ) - rooms_to_update_rows = cast(List[Tuple[str, int]], txn.fetchall()) + rooms_to_update_rows = cast(List[Tuple[str]], txn.fetchall()) return rooms_to_update_rows @@ -1669,28 +1675,23 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: joined_room_stream_ordering_updates: Dict[ str, _JoinedRoomStreamOrderingUpdate ] = {} - # Map from room_id to the progress value (event_stream_ordering) - # - # This needs to be an `OrderedDict` because we need to process things in - # `event_stream_ordering` order *ascending* to save our progress position - # correctly if we need to exit early. - room_id_to_progress_marker_map: OrderedDict[str, int] = OrderedDict() # As long as we get this value before we fetch the current state, we can use it # to check if something has changed since that point. most_recent_current_state_delta_stream_id = ( await self.get_max_stream_id_in_current_state_deltas() ) - for room_id, progress_event_stream_ordering in rooms_to_update: - room_id_to_progress_marker_map[room_id] = progress_event_stream_ordering - + for (room_id,) in rooms_to_update: current_state_ids_map = await self.db_pool.runInteraction( "_sliding_sync_joined_rooms_bg_update._get_relevant_sliding_sync_current_state_event_ids_txn", PersistEventsStore._get_relevant_sliding_sync_current_state_event_ids_txn, room_id, ) - # We're iterating over rooms pulled from the current_state_events table - # so we should have some current state for each room - assert current_state_ids_map + + # If we're not joined to the room a) it doesn't belong in the + # `sliding_sync_joined_rooms` table so we should skip and b) we won't have + # any `current_state_events` for the room. + if not current_state_ids_map: + continue fetched_events = await self.get_events(current_state_ids_map.values()) @@ -1701,9 +1702,9 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: if event_id in fetched_events } - # Can happen for unknown room versions (old room versions that aren't known - # anymore) since `get_events(...)` will filter out events for unknown room - # versions + # Even if we are joined to the room, this can happen for unknown room + # versions (old room versions that aren't known anymore) since + # `get_events(...)` will filter out events for unknown room versions if not current_state_map: continue @@ -1754,23 +1755,17 @@ def _get_rooms_to_update_txn(txn: LoggingTransaction) -> List[Tuple[str, int]]: def _fill_table_txn(txn: LoggingTransaction) -> None: # Handle updating the `sliding_sync_joined_rooms` table # - last_successful_room_id: Optional[str] = None - # Process the rooms in `event_stream_ordering` order *ascending* so we can - # save our position correctly if we need to exit early. - # `progress_event_stream_ordering` is an `OrderedDict` which remembers - # insertion order (and we inserted in the correct order) so this should be - # the correct thing to do. for ( room_id, - progress_event_stream_ordering, - ) in room_id_to_progress_marker_map.items(): - update_map = joined_room_updates[room_id] - - joined_room_update = joined_room_stream_ordering_updates[room_id] + update_map, + ) in joined_room_updates.items(): + joined_room_stream_ordering_update = ( + joined_room_stream_ordering_updates[room_id] + ) event_stream_ordering = ( - joined_room_update.most_recent_event_stream_ordering + joined_room_stream_ordering_update.most_recent_event_stream_ordering ) - bump_stamp = joined_room_update.most_recent_bump_stamp + bump_stamp = joined_room_stream_ordering_update.most_recent_bump_stamp # Check if the current state has been updated since we gathered it state_deltas_since_we_gathered_current_state = ( @@ -1790,15 +1785,6 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: state_delta.event_type, state_delta.state_key, ) in SLIDING_SYNC_RELEVANT_STATE_SET: - # Save our progress before we exit early - if last_successful_room_id is not None: - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - { - "last_event_stream_ordering": progress_event_stream_ordering - }, - ) # Raising exception so we can just exit and try again. It would # be hard to resolve this within the transaction because we need # to get full events out that take redactions into account. We @@ -1829,20 +1815,17 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: }, ) - # Keep track of the last successful room_id - last_successful_room_id = room_id + # Now that we've processed the room, we can remove it from the queue + self.db_pool.simple_delete_txn( + txn, + table="sliding_sync_joined_rooms_to_recalculate", + keyvalues={"room_id": room_id}, + ) await self.db_pool.runInteraction( "sliding_sync_joined_rooms_bg_update", _fill_table_txn ) - # Update the progress - _ = room_id_to_progress_marker_map.values() - await self.db_pool.updates._background_update_progress( - _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - {"last_event_stream_ordering": rooms_to_update[-1][1]}, - ) - return len(rooms_to_update) async def _sliding_sync_membership_snapshots_bg_update( diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 034e6f6ccd4..9e9c27e3b18 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -642,6 +642,7 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # nothing to clean up row = cast(Optional[Tuple[int]], txn.fetchone()) max_stream_ordering_sliding_sync_joined_rooms_table = None + depends_on = None if row is not None: (max_stream_ordering_sliding_sync_joined_rooms_table,) = row @@ -668,6 +669,7 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( for chunk in batch_iter(room_rows, 1000): # Handle updating the `sliding_sync_joined_rooms` table # + # Clear out the stale data DatabasePool.simple_delete_many_batch_txn( txn, table="sliding_sync_joined_rooms", @@ -675,6 +677,44 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( values=chunk, ) + # Update the `sliding_sync_joined_rooms_to_recalculate` table with the rooms + # that went stale and now need to be recalculated. + # + # FIXME: There is potentially a race where the unique index (added via + # `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`) + # hasn't been added at this point so we won't be able to upsert + DatabasePool.simple_upsert_many_txn_native_upsert( + txn, + table="sliding_sync_joined_rooms_to_recalculate", + key_names=("room_id",), + key_values=chunk, + value_names=(), + # No value columns, therefore make a blank list so that the following + # zip() works correctly. + value_values=[() for x in range(len(chunk))], + ) + else: + # Re-run the `sliding_sync_joined_rooms_to_recalculate` prefill if there is + # nothing in the `sliding_sync_joined_rooms` table + DatabasePool.simple_upsert_txn_native_upsert( + txn, + table="background_updates", + keyvalues={ + "update_name": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE + }, + values={}, + # Only insert the row if it doesn't already exist. If it already exists, + # we're already working on it + insertion_values={ + "progress_json": "{}", + # Since we're going to upsert, we need to make sure the unique index is in place + "depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + }, + ) + depends_on = ( + _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE + ) + # Now kick-off the background update to catch-up with what we missed while Synapse # was downgraded. # @@ -682,13 +722,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # `sliding_sync_joined_rooms` table yet. This could happen if someone had zero rooms # on their server (so the normal background update completes), downgrade Synapse # versions, join and create some new rooms, and upgrade again. - # - progress_json: JsonDict = {} - if max_stream_ordering_sliding_sync_joined_rooms_table is not None: - progress_json["last_event_stream_ordering"] = ( - max_stream_ordering_sliding_sync_joined_rooms_table - ) - DatabasePool.simple_upsert_txn_native_upsert( txn, table="background_updates", @@ -699,7 +732,10 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # Only insert the row if it doesn't already exist. If it already exists, we will # eventually fill in the rows we're trying to populate. insertion_values={ - "progress_json": json_encoder.encode(progress_json), + # Empty progress is expected since it's not used for this background update. + "progress_json": "{}", + # Wait for the prefill to finish + "depends_on": depends_on, }, ) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 6ab1897230f..11fb2c4d649 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -150,7 +150,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream -- 3. Add a background update to populate the new `sliding_sync_joined_rooms` table -- INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{}'); + (8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{ "initial_insert": true }'); INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES (8701, 'sliding_sync_index_joined_rooms_to_recalculate_table_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update'); INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 506d981ce6c..49dc973a369 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -112,6 +112,24 @@ def test_upsert_many(self) -> None: {(1, "user1", "hello"), (2, "user2", "bleb")}, ) + self.get_success( + self.storage.db_pool.runInteraction( + "test", + self.storage.db_pool.simple_upsert_many_txn, + self.table_name, + key_names=key_names, + key_values=[[2, "user2"]], + value_names=[], + value_values=[], + ) + ) + + # Check results are what we expect + self.assertEqual( + set(self._dump_table_to_tuple()), + {(1, "user1", "hello"), (2, "user2", "bleb")}, + ) + def test_simple_update_many(self) -> None: """ simple_update_many performs many updates at once. diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 2654decb0c7..300ccd664ed 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -2659,13 +2659,33 @@ def test_joined_background_update_missing(self) -> None: exact=True, ) - # Insert and run the background update. + # Insert and run the background updates. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + "progress_json": "{}", + "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + }, + ) + ) self.get_success( self.store.db_pool.simple_insert( "background_updates", { "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, "progress_json": "{}", + "depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, }, ) ) @@ -2807,12 +2827,32 @@ def test_joined_background_update_partial(self) -> None: ) # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + "progress_json": "{}", + "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + }, + ) + ) self.get_success( self.store.db_pool.simple_insert( "background_updates", { "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, "progress_json": "{}", + "depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, }, ) ) From da463fb102709e12caa3abb5ce2ad3d41bf94401 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 00:50:33 -0500 Subject: [PATCH 127/142] Add unique index right away for `sliding_sync_joined_rooms_to_recalculate` This makes it so we can always `upsert` to avoid duplicates otherwise I'm not sure of how to not insert duplicates in certain situations (see FIXME in the diff) which would cause problems down the line for the unique index being added later. --- .../databases/main/events_bg_updates.py | 51 +++++-------------- synapse/storage/prepare_database.py | 6 --- .../delta/87/01_sliding_sync_memberships.sql | 18 ++----- tests/storage/test_sliding_sync_tables.py | 24 +-------- 4 files changed, 21 insertions(+), 78 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 85014719ae2..95244a48044 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -91,9 +91,6 @@ class _BackgroundUpdates: SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = ( "sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update" ) - SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = ( - "sliding_sync_index_joined_rooms_to_recalculate_table_bg_update" - ) SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE = "sliding_sync_joined_rooms_bg_update" SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE = ( "sliding_sync_membership_snapshots_bg_update" @@ -318,13 +315,6 @@ def __init__( _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, self._sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update, ) - self.db_pool.updates.register_background_index_update( - _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - index_name="sliding_sync_joined_rooms_to_recalculate_room_id_idx", - table="sliding_sync_joined_rooms_to_recalculate", - columns=["room_id"], - unique=True, - ) # Add some background updates to populate the sliding sync tables self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, @@ -1579,38 +1569,25 @@ async def _sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update( """ Prefill `sliding_sync_joined_rooms_to_recalculate` table with all rooms we know about already. """ - initial_insert = progress.get("initial_insert", False) def _txn(txn: LoggingTransaction) -> None: # We do this as one big bulk insert. This has been tested on a bigger # homeserver with ~10M rooms and took 11s. There is potential for this to # starve disk usage while this goes on. - if initial_insert: - txn.execute( - """ - INSERT INTO sliding_sync_joined_rooms_to_recalculate - (room_id) - SELECT room_id FROM rooms; - """, - ) - else: - # We can only upsert once the unique index has been added to the table - # (see - # `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`) - # - # We upsert in case we have to run this multiple times. - # - # The `WHERE TRUE` clause is to avoid "Parsing Ambiguity" - txn.execute( - """ - INSERT INTO sliding_sync_joined_rooms_to_recalculate - (room_id) - SELECT room_id FROM rooms WHERE ? - ON CONFLICT (room_id) - DO NOTHING; - """, - (True,), - ) + # + # We upsert in case we have to run this multiple times. + # + # The `WHERE TRUE` clause is to avoid "Parsing Ambiguity" + txn.execute( + """ + INSERT INTO sliding_sync_joined_rooms_to_recalculate + (room_id) + SELECT room_id FROM rooms WHERE ? + ON CONFLICT (room_id) + DO NOTHING; + """, + (True,), + ) await self.db_pool.runInteraction( "_sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update", diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 9e9c27e3b18..0c171b380b8 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -679,10 +679,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # Update the `sliding_sync_joined_rooms_to_recalculate` table with the rooms # that went stale and now need to be recalculated. - # - # FIXME: There is potentially a race where the unique index (added via - # `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`) - # hasn't been added at this point so we won't be able to upsert DatabasePool.simple_upsert_many_txn_native_upsert( txn, table="sliding_sync_joined_rooms_to_recalculate", @@ -707,8 +703,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( # we're already working on it insertion_values={ "progress_json": "{}", - # Since we're going to upsert, we need to make sure the unique index is in place - "depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, }, ) depends_on = ( diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 11fb2c4d649..71539e6bd7d 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -16,11 +16,9 @@ -- during the background update to populate `sliding_sync_joined_rooms` which works but -- it takes a lot of work for the database to grab `DISTINCT` room_ids given how many -- state events there are for each room. --- --- This table doesn't have any indexes at this point. We add the indexes in a separate --- step to avoid the extra calculations during the bulk one-shot prefill insert. CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms_to_recalculate( - room_id TEXT NOT NULL REFERENCES rooms(room_id) + room_id TEXT NOT NULL REFERENCES rooms(room_id), + PRIMARY KEY (room_id) ); -- A table for storing room meta data (current state relevant to sliding sync) that the @@ -143,18 +141,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream -- -- 1. Add a background update to prefill `sliding_sync_joined_rooms_to_recalculate`. -- We do a one-shot bulk insert from the `rooms` table to prefill. --- 2. Add a background update to add indexes to the --- `sliding_sync_joined_rooms_to_recalculate` table after the one-shot bulk insert. --- We add the index in a separate step after to avoid the extra calculations during --- the one-shot bulk insert. --- 3. Add a background update to populate the new `sliding_sync_joined_rooms` table +-- 2. Add a background update to populate the new `sliding_sync_joined_rooms` table -- INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{ "initial_insert": true }'); -INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES - (8701, 'sliding_sync_index_joined_rooms_to_recalculate_table_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update'); + (8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{}'); INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES - (8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_index_joined_rooms_to_calculate_table_bg_update'); + (8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update'); -- Add a background updates to populate the new `sliding_sync_membership_snapshots` table INSERT INTO background_updates (ordering, update_name, progress_json) VALUES diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 300ccd664ed..0770ea5e33e 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -2669,23 +2669,13 @@ def test_joined_background_update_missing(self) -> None: }, ) ) - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - "progress_json": "{}", - "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - }, - ) - ) self.get_success( self.store.db_pool.simple_insert( "background_updates", { "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, "progress_json": "{}", - "depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, }, ) ) @@ -2836,23 +2826,13 @@ def test_joined_background_update_partial(self) -> None: }, ) ) - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - "progress_json": "{}", - "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - }, - ) - ) self.get_success( self.store.db_pool.simple_insert( "background_updates", { "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, "progress_json": "{}", - "depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, + "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, }, ) ) From 7c9c62051ce010e21304283a4fbcb78d3ad90360 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Aug 2024 11:24:20 +0100 Subject: [PATCH 128/142] Remove all rooms pulled out from the queue --- .../databases/main/events_bg_updates.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 95244a48044..ff99a7b0b60 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1792,12 +1792,18 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: }, ) - # Now that we've processed the room, we can remove it from the queue - self.db_pool.simple_delete_txn( - txn, - table="sliding_sync_joined_rooms_to_recalculate", - keyvalues={"room_id": room_id}, - ) + # Now that we've processed all the room, we can remove them from the + # queue. + # + # Note: we need to remove all the rooms from the queue we pulled out + # from the DB, not just the ones we've processed above. Otherwise + # we'll simply keep pulling out the same rooms over and over again. + self.db_pool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_joined_rooms_to_recalculate", + keys=("room_id",), + values=rooms_to_update, + ) await self.db_pool.runInteraction( "sliding_sync_joined_rooms_bg_update", _fill_table_txn From bb905cd02c93a7ef309f40f5dc61c6c4899e75fd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Aug 2024 11:44:56 +0100 Subject: [PATCH 129/142] Only run the sliding sync background updates on the main database --- .../databases/main/events_bg_updates.py | 253 +++++++++++++++++ synapse/storage/prepare_database.py | 261 +----------------- tests/storage/test_sliding_sync_tables.py | 4 +- 3 files changed, 256 insertions(+), 262 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index ff99a7b0b60..3603f466787 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -47,6 +47,8 @@ from synapse.types import JsonDict, RoomStreamToken, StateMap, StrCollection from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.types.state import StateFilter +from synapse.util import json_encoder +from synapse.util.iterutils import batch_iter if TYPE_CHECKING: from synapse.server import HomeServer @@ -325,6 +327,15 @@ def __init__( self._sliding_sync_membership_snapshots_bg_update, ) + # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + # foreground update for + # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + # https://github.com/element-hq/synapse/issues/TODO) + with db_conn.cursor(txn_name="resolve_sliding_sync") as txn: + _resolve_stale_data_in_sliding_sync_tables( + txn=txn, + ) + async def _background_reindex_fields_sender( self, progress: JsonDict, batch_size: int ) -> int: @@ -2147,3 +2158,245 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: ) return len(memberships_to_update_rows) + + +def _resolve_stale_data_in_sliding_sync_tables( + txn: LoggingTransaction, +) -> None: + """ + Clears stale/out-of-date entries from the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. + + This accounts for when someone downgrades their Synapse version and then upgrades it + again. This will ensure that we don't have any stale/out-of-date data in the + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables since any new + events sent in rooms would have also needed to be written to the sliding sync + tables. For example a new event needs to bump `event_stream_ordering` in + `sliding_sync_joined_rooms` table or some state in the room changing (like the room + name). Or another example of someone's membership changing in a room affecting + `sliding_sync_membership_snapshots`. + + This way, if a row exists in the sliding sync tables, we are able to rely on it + (accurate data). And if a row doesn't exist, we use a fallback to get the same info + until the background updates fill in the rows or a new event comes in triggering it + to be fully inserted. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/TODO) + """ + + _resolve_stale_data_in_sliding_sync_joined_rooms_table(txn) + _resolve_stale_data_in_sliding_sync_membership_snapshots_table(txn) + + +def _resolve_stale_data_in_sliding_sync_joined_rooms_table( + txn: LoggingTransaction, +) -> None: + """ + Clears stale/out-of-date entries from the `sliding_sync_joined_rooms` table and + kicks-off the background update to catch-up with what we missed while Synapse was + downgraded. + + See `_resolve_stale_data_in_sliding_sync_tables()` description above for more + context. + """ + + # Find the point when we stopped writing to the `sliding_sync_joined_rooms` table + txn.execute( + """ + SELECT event_stream_ordering + FROM sliding_sync_joined_rooms + ORDER BY event_stream_ordering DESC + LIMIT 1 + """, + ) + + # If we have nothing written to the `sliding_sync_joined_rooms` table, there is + # nothing to clean up + row = cast(Optional[Tuple[int]], txn.fetchone()) + max_stream_ordering_sliding_sync_joined_rooms_table = None + depends_on = None + if row is not None: + (max_stream_ordering_sliding_sync_joined_rooms_table,) = row + + txn.execute( + """ + SELECT room_id + FROM events + WHERE stream_ordering > ? + GROUP BY room_id + ORDER BY MAX(stream_ordering) ASC + """, + (max_stream_ordering_sliding_sync_joined_rooms_table,), + ) + + room_rows = txn.fetchall() + # No new events have been written to the `events` table since the last time we wrote + # to the `sliding_sync_joined_rooms` table so there is nothing to clean up. This is + # the expected normal scenario for people who have not downgraded their Synapse + # version. + if not room_rows: + return + + # 1000 is an arbitrary batch size with no testing + for chunk in batch_iter(room_rows, 1000): + # Handle updating the `sliding_sync_joined_rooms` table + # + # Clear out the stale data + DatabasePool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_joined_rooms", + keys=("room_id",), + values=chunk, + ) + + # Update the `sliding_sync_joined_rooms_to_recalculate` table with the rooms + # that went stale and now need to be recalculated. + DatabasePool.simple_upsert_many_txn_native_upsert( + txn, + table="sliding_sync_joined_rooms_to_recalculate", + key_names=("room_id",), + key_values=chunk, + value_names=(), + # No value columns, therefore make a blank list so that the following + # zip() works correctly. + value_values=[() for x in range(len(chunk))], + ) + else: + # Re-run the `sliding_sync_joined_rooms_to_recalculate` prefill if there is + # nothing in the `sliding_sync_joined_rooms` table + DatabasePool.simple_upsert_txn_native_upsert( + txn, + table="background_updates", + keyvalues={ + "update_name": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE + }, + values={}, + # Only insert the row if it doesn't already exist. If it already exists, + # we're already working on it + insertion_values={ + "progress_json": "{}", + }, + ) + depends_on = ( + _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE + ) + + # Now kick-off the background update to catch-up with what we missed while Synapse + # was downgraded. + # + # We may need to catch-up on everything if we have nothing written to the + # `sliding_sync_joined_rooms` table yet. This could happen if someone had zero rooms + # on their server (so the normal background update completes), downgrade Synapse + # versions, join and create some new rooms, and upgrade again. + DatabasePool.simple_upsert_txn_native_upsert( + txn, + table="background_updates", + keyvalues={ + "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE + }, + values={}, + # Only insert the row if it doesn't already exist. If it already exists, we will + # eventually fill in the rows we're trying to populate. + insertion_values={ + # Empty progress is expected since it's not used for this background update. + "progress_json": "{}", + # Wait for the prefill to finish + "depends_on": depends_on, + }, + ) + + +def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( + txn: LoggingTransaction, +) -> None: + """ + Clears stale/out-of-date entries from the `sliding_sync_membership_snapshots` table + and kicks-off the background update to catch-up with what we missed while Synapse + was downgraded. + + See `_resolve_stale_data_in_sliding_sync_tables()` description above for more + context. + """ + + # Find the point when we stopped writing to the `sliding_sync_membership_snapshots` table + txn.execute( + """ + SELECT event_stream_ordering + FROM sliding_sync_membership_snapshots + ORDER BY event_stream_ordering DESC + LIMIT 1 + """, + ) + + # If we have nothing written to the `sliding_sync_membership_snapshots` table, + # there is nothing to clean up + row = cast(Optional[Tuple[int]], txn.fetchone()) + max_stream_ordering_sliding_sync_membership_snapshots_table = None + if row is not None: + (max_stream_ordering_sliding_sync_membership_snapshots_table,) = row + + # XXX: Since `forgotten` is simply a flag on the `room_memberships` table that is + # set out-of-band, there is no way to tell whether it was set while Synapse was + # downgraded. The only thing the user can do is `/forget` again if they run into + # this. + # + # This only picks up changes to memberships. + txn.execute( + """ + SELECT user_id, room_id + FROM local_current_membership + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering ASC + """, + (max_stream_ordering_sliding_sync_membership_snapshots_table,), + ) + + membership_rows = txn.fetchall() + # No new events have been written to the `events` table since the last time we wrote + # to the `sliding_sync_membership_snapshots` table so there is nothing to clean up. + # This is the expected normal scenario for people who have not downgraded their + # Synapse version. + if not membership_rows: + return + + # 1000 is an arbitrary batch size with no testing + for chunk in batch_iter(membership_rows, 1000): + # Handle updating the `sliding_sync_membership_snapshots` table + # + DatabasePool.simple_delete_many_batch_txn( + txn, + table="sliding_sync_membership_snapshots", + keys=("user_id", "room_id"), + values=chunk, + ) + + # Now kick-off the background update to catch-up with what we missed while Synapse + # was downgraded. + # + # We may need to catch-up on everything if we have nothing written to the + # `sliding_sync_membership_snapshots` table yet. This could happen if someone had + # zero rooms on their server (so the normal background update completes), downgrade + # Synapse versions, join and create some new rooms, and upgrade again. + # + progress_json: JsonDict = {} + if max_stream_ordering_sliding_sync_membership_snapshots_table is not None: + progress_json["last_event_stream_ordering"] = ( + max_stream_ordering_sliding_sync_membership_snapshots_table + ) + + DatabasePool.simple_upsert_txn_native_upsert( + txn, + table="background_updates", + keyvalues={ + "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE + }, + values={}, + # Only insert the row if it doesn't already exist. If it already exists, we will + # eventually fill in the rows we're trying to populate. + insertion_values={ + "progress_json": json_encoder.encode(progress_json), + }, + ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 0c171b380b8..aaffe5ecc9e 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -32,24 +32,15 @@ Optional, TextIO, Tuple, - cast, ) import attr from synapse.config.homeserver import HomeServerConfig -from synapse.storage.database import ( - DatabasePool, - LoggingDatabaseConnection, - LoggingTransaction, -) -from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates +from synapse.storage.database import LoggingDatabaseConnection, LoggingTransaction from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor -from synapse.types import JsonDict -from synapse.util import json_encoder -from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) @@ -576,256 +567,6 @@ def _upgrade_existing_database( logger.info("Schema now up to date") - # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the - # foreground update for - # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - # https://github.com/element-hq/synapse/issues/TODO) - _resolve_stale_data_in_sliding_sync_tables( - txn=cur, - ) - - -def _resolve_stale_data_in_sliding_sync_tables( - txn: LoggingTransaction, -) -> None: - """ - Clears stale/out-of-date entries from the - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables. - - This accounts for when someone downgrades their Synapse version and then upgrades it - again. This will ensure that we don't have any stale/out-of-date data in the - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` tables since any new - events sent in rooms would have also needed to be written to the sliding sync - tables. For example a new event needs to bump `event_stream_ordering` in - `sliding_sync_joined_rooms` table or some state in the room changing (like the room - name). Or another example of someone's membership changing in a room affecting - `sliding_sync_membership_snapshots`. - - This way, if a row exists in the sliding sync tables, we are able to rely on it - (accurate data). And if a row doesn't exist, we use a fallback to get the same info - until the background updates fill in the rows or a new event comes in triggering it - to be fully inserted. - - FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the - foreground update for - `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - https://github.com/element-hq/synapse/issues/TODO) - """ - - _resolve_stale_data_in_sliding_sync_joined_rooms_table(txn) - _resolve_stale_data_in_sliding_sync_membership_snapshots_table(txn) - - -def _resolve_stale_data_in_sliding_sync_joined_rooms_table( - txn: LoggingTransaction, -) -> None: - """ - Clears stale/out-of-date entries from the `sliding_sync_joined_rooms` table and - kicks-off the background update to catch-up with what we missed while Synapse was - downgraded. - - See `_resolve_stale_data_in_sliding_sync_tables()` description above for more - context. - """ - - # Find the point when we stopped writing to the `sliding_sync_joined_rooms` table - txn.execute( - """ - SELECT event_stream_ordering - FROM sliding_sync_joined_rooms - ORDER BY event_stream_ordering DESC - LIMIT 1 - """, - ) - - # If we have nothing written to the `sliding_sync_joined_rooms` table, there is - # nothing to clean up - row = cast(Optional[Tuple[int]], txn.fetchone()) - max_stream_ordering_sliding_sync_joined_rooms_table = None - depends_on = None - if row is not None: - (max_stream_ordering_sliding_sync_joined_rooms_table,) = row - - txn.execute( - """ - SELECT room_id - FROM events - WHERE stream_ordering > ? - GROUP BY room_id - ORDER BY MAX(stream_ordering) ASC - """, - (max_stream_ordering_sliding_sync_joined_rooms_table,), - ) - - room_rows = txn.fetchall() - # No new events have been written to the `events` table since the last time we wrote - # to the `sliding_sync_joined_rooms` table so there is nothing to clean up. This is - # the expected normal scenario for people who have not downgraded their Synapse - # version. - if not room_rows: - return - - # 1000 is an arbitrary batch size with no testing - for chunk in batch_iter(room_rows, 1000): - # Handle updating the `sliding_sync_joined_rooms` table - # - # Clear out the stale data - DatabasePool.simple_delete_many_batch_txn( - txn, - table="sliding_sync_joined_rooms", - keys=("room_id",), - values=chunk, - ) - - # Update the `sliding_sync_joined_rooms_to_recalculate` table with the rooms - # that went stale and now need to be recalculated. - DatabasePool.simple_upsert_many_txn_native_upsert( - txn, - table="sliding_sync_joined_rooms_to_recalculate", - key_names=("room_id",), - key_values=chunk, - value_names=(), - # No value columns, therefore make a blank list so that the following - # zip() works correctly. - value_values=[() for x in range(len(chunk))], - ) - else: - # Re-run the `sliding_sync_joined_rooms_to_recalculate` prefill if there is - # nothing in the `sliding_sync_joined_rooms` table - DatabasePool.simple_upsert_txn_native_upsert( - txn, - table="background_updates", - keyvalues={ - "update_name": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE - }, - values={}, - # Only insert the row if it doesn't already exist. If it already exists, - # we're already working on it - insertion_values={ - "progress_json": "{}", - }, - ) - depends_on = ( - _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE - ) - - # Now kick-off the background update to catch-up with what we missed while Synapse - # was downgraded. - # - # We may need to catch-up on everything if we have nothing written to the - # `sliding_sync_joined_rooms` table yet. This could happen if someone had zero rooms - # on their server (so the normal background update completes), downgrade Synapse - # versions, join and create some new rooms, and upgrade again. - DatabasePool.simple_upsert_txn_native_upsert( - txn, - table="background_updates", - keyvalues={ - "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE - }, - values={}, - # Only insert the row if it doesn't already exist. If it already exists, we will - # eventually fill in the rows we're trying to populate. - insertion_values={ - # Empty progress is expected since it's not used for this background update. - "progress_json": "{}", - # Wait for the prefill to finish - "depends_on": depends_on, - }, - ) - - -def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( - txn: LoggingTransaction, -) -> None: - """ - Clears stale/out-of-date entries from the `sliding_sync_membership_snapshots` table - and kicks-off the background update to catch-up with what we missed while Synapse - was downgraded. - - See `_resolve_stale_data_in_sliding_sync_tables()` description above for more - context. - """ - - # Find the point when we stopped writing to the `sliding_sync_membership_snapshots` table - txn.execute( - """ - SELECT event_stream_ordering - FROM sliding_sync_membership_snapshots - ORDER BY event_stream_ordering DESC - LIMIT 1 - """, - ) - - # If we have nothing written to the `sliding_sync_membership_snapshots` table, - # there is nothing to clean up - row = cast(Optional[Tuple[int]], txn.fetchone()) - max_stream_ordering_sliding_sync_membership_snapshots_table = None - if row is not None: - (max_stream_ordering_sliding_sync_membership_snapshots_table,) = row - - # XXX: Since `forgotten` is simply a flag on the `room_memberships` table that is - # set out-of-band, there is no way to tell whether it was set while Synapse was - # downgraded. The only thing the user can do is `/forget` again if they run into - # this. - # - # This only picks up changes to memberships. - txn.execute( - """ - SELECT user_id, room_id - FROM local_current_membership - WHERE event_stream_ordering > ? - ORDER BY event_stream_ordering ASC - """, - (max_stream_ordering_sliding_sync_membership_snapshots_table,), - ) - - membership_rows = txn.fetchall() - # No new events have been written to the `events` table since the last time we wrote - # to the `sliding_sync_membership_snapshots` table so there is nothing to clean up. - # This is the expected normal scenario for people who have not downgraded their - # Synapse version. - if not membership_rows: - return - - # 1000 is an arbitrary batch size with no testing - for chunk in batch_iter(membership_rows, 1000): - # Handle updating the `sliding_sync_membership_snapshots` table - # - DatabasePool.simple_delete_many_batch_txn( - txn, - table="sliding_sync_membership_snapshots", - keys=("user_id", "room_id"), - values=chunk, - ) - - # Now kick-off the background update to catch-up with what we missed while Synapse - # was downgraded. - # - # We may need to catch-up on everything if we have nothing written to the - # `sliding_sync_membership_snapshots` table yet. This could happen if someone had - # zero rooms on their server (so the normal background update completes), downgrade - # Synapse versions, join and create some new rooms, and upgrade again. - # - progress_json: JsonDict = {} - if max_stream_ordering_sliding_sync_membership_snapshots_table is not None: - progress_json["last_event_stream_ordering"] = ( - max_stream_ordering_sliding_sync_membership_snapshots_table - ) - - DatabasePool.simple_upsert_txn_native_upsert( - txn, - table="background_updates", - keyvalues={ - "update_name": _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE - }, - values={}, - # Only insert the row if it doesn't already exist. If it already exists, we will - # eventually fill in the rows we're trying to populate. - insertion_values={ - "progress_json": json_encoder.encode(progress_json), - }, - ) - def _apply_module_schemas( txn: Cursor, database_engine: BaseDatabaseEngine, config: HomeServerConfig diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 0770ea5e33e..f6a6796e7b4 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -33,8 +33,8 @@ from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.events import DeltaState -from synapse.storage.databases.main.events_bg_updates import _BackgroundUpdates -from synapse.storage.prepare_database import ( +from synapse.storage.databases.main.events_bg_updates import ( + _BackgroundUpdates, _resolve_stale_data_in_sliding_sync_joined_rooms_table, _resolve_stale_data_in_sliding_sync_membership_snapshots_table, ) From 6f9932d146fda288688da4b7a75352364fa9d26b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Aug 2024 14:13:52 +0100 Subject: [PATCH 130/142] Handle old rows with null event_stream_ordering column --- .../databases/main/events_bg_updates.py | 132 +++++++++++++----- 1 file changed, 99 insertions(+), 33 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 3603f466787..1f8905fefaf 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1828,38 +1828,84 @@ async def _sliding_sync_membership_snapshots_bg_update( """ Background update to populate the `sliding_sync_membership_snapshots` table. """ - last_event_stream_ordering = progress.get( - "last_event_stream_ordering", -(1 << 31) - ) + # We do this in two phases: a) the initial phase where we go through all + # room memberships, and then b) a second phase where we look at new + # memberships (this is to handle the case where we downgrade and then + # upgrade again). + # + # We have to do this as two phases (rather than just the second phase + # where we iterate on event_stream_ordering), as the + # `event_stream_ordering` column may have null values for old rows. + # Therefore we first do the set of historic rooms and *then* look at any + # new rows (which will have a non-null `event_stream_ordering`). + initial_phase = progress.get("initial_phase") + if initial_phase is None: + # If this is the first run, store the current max stream position. + # We know we will go through all memberships less than the current + # max in the initial phase. + progress = { + "initial_phase": True, + "last_event_stream_ordering": self.get_room_max_stream_ordering(), + } + await self.db_pool.updates._background_update_progress( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + progress, + ) + initial_phase = True + + last_room_id = progress.get("last_room_id", "") + last_event_stream_ordering = progress["last_event_stream_ordering"] def _find_memberships_to_update_txn( txn: LoggingTransaction, ) -> List[Tuple[str, str, str, str, str, int, bool]]: # Fetch the set of event IDs that we want to update - # - # It's important to sort by `event_stream_ordering` *ascending* (oldest to - # newest) so that if we see that this background update in progress and want - # to start the catch-up process, we can safely assume that it will - # eventually get to the rooms we want to catch-up on anyway (see - # `_resolve_stale_data_in_sliding_sync_tables()`). - txn.execute( - """ - SELECT - c.room_id, - c.user_id, - e.sender, - c.event_id, - c.membership, - c.event_stream_ordering, - e.outlier - FROM local_current_membership as c - INNER JOIN events AS e USING (event_id) - WHERE event_stream_ordering > ? - ORDER BY event_stream_ordering ASC - LIMIT ? - """, - (last_event_stream_ordering, batch_size), - ) + + if initial_phase: + txn.execute( + """ + SELECT + c.room_id, + c.user_id, + e.sender, + c.event_id, + c.membership, + e.stream_ordering, + e.outlier + FROM local_current_membership as c + INNER JOIN events AS e USING (event_id) + WHERE c.room_id > ? + ORDER BY c.room_id ASC + LIMIT ? + """, + (last_room_id, batch_size), + ) + elif last_event_stream_ordering is not None: + # It's important to sort by `event_stream_ordering` *ascending* (oldest to + # newest) so that if we see that this background update in progress and want + # to start the catch-up process, we can safely assume that it will + # eventually get to the rooms we want to catch-up on anyway (see + # `_resolve_stale_data_in_sliding_sync_tables()`). + txn.execute( + """ + SELECT + c.room_id, + c.user_id, + e.sender, + c.event_id, + c.membership, + c.event_stream_ordering, + e.outlier + FROM local_current_membership as c + INNER JOIN events AS e USING (event_id) + WHERE event_stream_ordering > ? + ORDER BY event_stream_ordering ASC + LIMIT ? + """, + (last_event_stream_ordering, batch_size), + ) + else: + raise Exception("last_event_stream_ordering should not be None") memberships_to_update_rows = cast( List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() @@ -1873,10 +1919,22 @@ def _find_memberships_to_update_txn( ) if not memberships_to_update_rows: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE - ) - return 0 + if initial_phase: + # Move onto the next phase. + await self.db_pool.updates._background_update_progress( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, + { + "initial_phase": False, + "last_event_stream_ordering": last_event_stream_ordering, + }, + ) + return 0 + else: + # We've finished both phases, we're done. + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE + ) + return 0 def _find_previous_membership_txn( txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int @@ -2144,7 +2202,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: # Update the progress ( - _room_id, + room_id, _user_id, _sender, _membership_event_id, @@ -2152,9 +2210,16 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: membership_event_stream_ordering, _is_outlier, ) = memberships_to_update_rows[-1] + + progress = { + "initial_phase": initial_phase, + "last_room_id": room_id, + "last_event_stream_ordering": membership_event_stream_ordering, + } + await self.db_pool.updates._background_update_progress( _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, - {"last_event_stream_ordering": membership_event_stream_ordering}, + progress, ) return len(memberships_to_update_rows) @@ -2383,6 +2448,7 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( # progress_json: JsonDict = {} if max_stream_ordering_sliding_sync_membership_snapshots_table is not None: + progress_json["initial_phase"] = False progress_json["last_event_stream_ordering"] = ( max_stream_ordering_sliding_sync_membership_snapshots_table ) From ab414f2ab8a294fbffb417003eeea0f14bbd6588 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Aug 2024 14:23:19 +0100 Subject: [PATCH 131/142] Use event_auth table to get previous membership --- .../databases/main/events_bg_updates.py | 31 +++++++------------ tests/storage/test_sliding_sync_tables.py | 10 +++--- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 1f8905fefaf..946d5ec65bd 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1937,33 +1937,27 @@ def _find_memberships_to_update_txn( return 0 def _find_previous_membership_txn( - txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int + txn: LoggingTransaction, event_id: str, user_id: str ) -> Tuple[str, str]: - # Find the previous invite/knock event before the leave event + # Find the previous invite/knock event before the leave event. This + # is done by looking at the auth events of the invite/knock and + # finding the corresponding membership event. txn.execute( """ - SELECT event_id, membership - FROM room_memberships - WHERE - room_id = ? - AND user_id = ? - AND event_stream_ordering < ? - ORDER BY event_stream_ordering DESC - LIMIT 1 + SELECT m.event_id, m.membership + FROM event_auth AS a + INNER JOIN room_memberships AS m ON (a.auth_id = m.event_id) + WHERE a.event_id = ? AND m.user_id = ? """, - ( - room_id, - user_id, - stream_ordering, - ), + (event_id, user_id), ) row = txn.fetchone() # We should see a corresponding previous invite/knock event assert row is not None - event_id, membership = row + previous_event_id, membership = row - return event_id, membership + return previous_event_id, membership # Map from (room_id, user_id) to ... to_insert_membership_snapshots: Dict[ @@ -2057,9 +2051,8 @@ def _find_previous_membership_txn( await self.db_pool.runInteraction( "sliding_sync_membership_snapshots_bg_update._find_previous_membership", _find_previous_membership_txn, - room_id, + membership_event_id, user_id, - membership_event_stream_ordering, ) ) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index f6a6796e7b4..569863ab13a 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -270,9 +270,7 @@ def _create_remote_invite_room_for_user( return invite_room_id, persisted_event def _retract_remote_invite_for_user( - self, - user_id: str, - remote_room_id: str, + self, user_id: str, remote_room_id: str, invite_event_id: str ) -> EventBase: """ Create a fake invite retraction for a remote room and persist it. @@ -285,6 +283,7 @@ def _retract_remote_invite_for_user( user_id: The person who was invited and we're going to retract the invite for. remote_room_id: The room ID that the invite was for. + invite_event_id: The event ID of the invite Returns: The persisted leave (kick) event. @@ -298,7 +297,7 @@ def _retract_remote_invite_for_user( "origin_server_ts": 1, "type": EventTypes.Member, "content": {"membership": Membership.LEAVE}, - "auth_events": [], + "auth_events": [invite_event_id], "prev_events": [], } @@ -2202,6 +2201,7 @@ def test_non_join_retracted_remote_invite(self) -> None: remote_invite_retraction_event = self._retract_remote_invite_for_user( user_id=user1_id, remote_room_id=remote_invite_room_id, + invite_event_id=remote_invite_event.event_id, ) # No one local is joined to the remote room @@ -3549,6 +3549,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret room_id_no_info_leave_event = self._retract_remote_invite_for_user( user_id=user1_id, remote_room_id=room_id_no_info, + invite_event_id=room_id_no_info_invite_event.event_id, ) room_id_with_info_leave_event_response = self.helper.leave( room_id_with_info, user1_id, tok=user1_tok @@ -3556,6 +3557,7 @@ def test_membership_snapshots_background_update_remote_invite_rejections_and_ret space_room_id_leave_event = self._retract_remote_invite_for_user( user_id=user1_id, remote_room_id=space_room_id, + invite_event_id=space_room_id_invite_event.event_id, ) # Clean-up the `sliding_sync_membership_snapshots` table as if the inserts did not From 90d0e035dd872150c5a99a42659f900f0a00949d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Aug 2024 14:14:09 +0100 Subject: [PATCH 132/142] Fix port script tests by handling empty DBs correctly --- synapse/storage/databases/main/events_bg_updates.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 946d5ec65bd..6b080f76782 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -2323,6 +2323,12 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( value_values=[() for x in range(len(chunk))], ) else: + txn.execute("SELECT 1 FROM local_current_membership LIMIT 1") + row = txn.fetchone() + if row is None: + # There are no rooms, so don't schedule the bg update. + return + # Re-run the `sliding_sync_joined_rooms_to_recalculate` prefill if there is # nothing in the `sliding_sync_joined_rooms` table DatabasePool.simple_upsert_txn_native_upsert( @@ -2430,6 +2436,12 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( keys=("user_id", "room_id"), values=chunk, ) + else: + txn.execute("SELECT 1 FROM local_current_membership LIMIT 1") + row = txn.fetchone() + if row is None: + # There are no rooms, so don't schedule the bg update. + return # Now kick-off the background update to catch-up with what we missed while Synapse # was downgraded. From 6a164eb6a5657c838430960dbb6ecab3f99ada27 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 11:09:18 -0500 Subject: [PATCH 133/142] Clarify why we return early --- synapse/storage/databases/main/events_bg_updates.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 6b080f76782..d7cc7f11a2b 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -2323,6 +2323,10 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table( value_values=[() for x in range(len(chunk))], ) else: + # Avoid adding the background updates when there is no data to run them on (if + # the homeserver has no rooms). The portdb script refuses to run with pending + # background updates and since we potentially add them every time the server + # starts, we add this check for to allow the script to breath. txn.execute("SELECT 1 FROM local_current_membership LIMIT 1") row = txn.fetchone() if row is None: @@ -2437,6 +2441,10 @@ def _resolve_stale_data_in_sliding_sync_membership_snapshots_table( values=chunk, ) else: + # Avoid adding the background updates when there is no data to run them on (if + # the homeserver has no rooms). The portdb script refuses to run with pending + # background updates and since we potentially add them every time the server + # starts, we add this check for to allow the script to breath. txn.execute("SELECT 1 FROM local_current_membership LIMIT 1") row = txn.fetchone() if row is None: From 3f56efb905587a1688c28760c7784c3bc267218f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 11:17:24 -0500 Subject: [PATCH 134/142] Note why it's there --- synapse/storage/databases/main/events_bg_updates.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index d7cc7f11a2b..f171f84fddb 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -327,6 +327,9 @@ def __init__( self._sliding_sync_membership_snapshots_bg_update, ) + # We want this to run on the main database at startup before we start processing + # events. + # # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the # foreground update for # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by From e12f95fd39a2c425669f9bb1bc7aff21463f471b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 11:18:09 -0500 Subject: [PATCH 135/142] Update metric with how long it took with the index and upsert --- synapse/storage/databases/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index f171f84fddb..5060a01a2fd 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1586,7 +1586,7 @@ async def _sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update( def _txn(txn: LoggingTransaction) -> None: # We do this as one big bulk insert. This has been tested on a bigger - # homeserver with ~10M rooms and took 11s. There is potential for this to + # homeserver with ~10M rooms and took 60s. There is potential for this to # starve disk usage while this goes on. # # We upsert in case we have to run this multiple times. From 116b8339265cb212ca0cfc56878418f315d9f012 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 11:37:50 -0500 Subject: [PATCH 136/142] Ignore membership where we don't have a row in `rooms` table --- synapse/storage/databases/main/events_bg_updates.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 5060a01a2fd..68f111d47f2 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1865,6 +1865,13 @@ def _find_memberships_to_update_txn( # Fetch the set of event IDs that we want to update if initial_phase: + # There are some old out-of-band memberships (before + # https://github.com/matrix-org/synapse/issues/6983) where we don't have + # the corresponding room stored in the `rooms` table`. We use `INNER + # JOIN rooms USING (room_id)` to ignore those events because we have a + # `FOREIGN KEY` constraint on the `sliding_sync_membership_snapshots` + # table. This means we will be ignoring invites/invite-rejections from + # before 2020 but that's probably *fine*. txn.execute( """ SELECT @@ -1877,6 +1884,7 @@ def _find_memberships_to_update_txn( e.outlier FROM local_current_membership as c INNER JOIN events AS e USING (event_id) + INNER JOIN rooms USING (room_id) WHERE c.room_id > ? ORDER BY c.room_id ASC LIMIT ? From de30d19b961f6288ad288ee32d42f686a7270dc1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 12:21:28 -0500 Subject: [PATCH 137/142] Fix-up `rooms` table for old out-of-band memberships --- .../databases/main/events_bg_updates.py | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 68f111d47f2..d1b68493b04 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1861,21 +1861,20 @@ async def _sliding_sync_membership_snapshots_bg_update( def _find_memberships_to_update_txn( txn: LoggingTransaction, - ) -> List[Tuple[str, str, str, str, str, int, bool]]: + ) -> List[Tuple[str, Optional[str], str, str, str, str, int, bool]]: # Fetch the set of event IDs that we want to update if initial_phase: # There are some old out-of-band memberships (before # https://github.com/matrix-org/synapse/issues/6983) where we don't have - # the corresponding room stored in the `rooms` table`. We use `INNER - # JOIN rooms USING (room_id)` to ignore those events because we have a - # `FOREIGN KEY` constraint on the `sliding_sync_membership_snapshots` - # table. This means we will be ignoring invites/invite-rejections from - # before 2020 but that's probably *fine*. + # the corresponding room stored in the `rooms` table`. We use `LEFT JOIN + # rooms AS r USING (room_id)` to find the rooms missing from `rooms` and + # insert a row for them below. txn.execute( """ SELECT c.room_id, + r.room_id, c.user_id, e.sender, c.event_id, @@ -1884,7 +1883,7 @@ def _find_memberships_to_update_txn( e.outlier FROM local_current_membership as c INNER JOIN events AS e USING (event_id) - INNER JOIN rooms USING (room_id) + LEFT JOIN rooms AS r USING (room_id) WHERE c.room_id > ? ORDER BY c.room_id ASC LIMIT ? @@ -1897,9 +1896,14 @@ def _find_memberships_to_update_txn( # to start the catch-up process, we can safely assume that it will # eventually get to the rooms we want to catch-up on anyway (see # `_resolve_stale_data_in_sliding_sync_tables()`). + # + # `c.room_id` is duplicated to make it match what we're doing in the + # `initial_phase`. But we can avoid doing the extra `rooms` table join + # because we can assume all of these new events won't have this problem. txn.execute( """ SELECT + c.room_id, c.room_id, c.user_id, e.sender, @@ -1919,7 +1923,8 @@ def _find_memberships_to_update_txn( raise Exception("last_event_stream_ordering should not be None") memberships_to_update_rows = cast( - List[Tuple[str, str, str, str, str, int, bool]], txn.fetchall() + List[Tuple[str, Optional[str], str, str, str, str, int, bool]], + txn.fetchall(), ) return memberships_to_update_rows @@ -1979,6 +1984,7 @@ def _find_previous_membership_txn( ) for ( room_id, + room_id_from_rooms_table, user_id, sender, membership_event_id, @@ -1996,6 +2002,26 @@ def _find_previous_membership_txn( Membership.BAN, ) + # There are some old out-of-band memberships (before + # https://github.com/matrix-org/synapse/issues/6983) where we don't have the + # corresponding room stored in the `rooms` table`. We have a `FOREIGN KEY` + # constraint on the `sliding_sync_membership_snapshots` table so we have to + # fix-up these memberships by adding the room to the `rooms` table. + if room_id_from_rooms_table is None: + await self.db_pool.simple_insert( + table="rooms", + values={ + "room_id": room_id, + # Only out-of-band memberships are missing from the `rooms` + # table so that is the only type of membership we're dealing + # with here. Since we don't calculate the "chain cover" for + # out-of-band memberships, we can just set this to `True` as if + # the user ever joins the room, we will end up calculating the + # "chain cover" anyway. + "has_auth_chain_index": True, + }, + ) + # Map of values to insert/update in the `sliding_sync_membership_snapshots` table sliding_sync_membership_snapshots_insert_map: ( SlidingSyncMembershipSnapshotSharedInsertValues @@ -2207,6 +2233,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: # Update the progress ( room_id, + _room_id_from_rooms_table, _user_id, _sender, _membership_event_id, From 15ad7a7950773666a8a05528bdaf17fd58ad5d9f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 16:40:16 -0500 Subject: [PATCH 138/142] Fill in placeholder issue --- synapse/storage/databases/main/events_bg_updates.py | 4 ++-- .../schema/main/delta/87/01_sliding_sync_memberships.sql | 5 +++++ tests/storage/test_sliding_sync_tables.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index d1b68493b04..605e1c6b85f 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -333,7 +333,7 @@ def __init__( # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the # foreground update for # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - # https://github.com/element-hq/synapse/issues/TODO) + # https://github.com/element-hq/synapse/issues/17623) with db_conn.cursor(txn_name="resolve_sliding_sync") as txn: _resolve_stale_data_in_sliding_sync_tables( txn=txn, @@ -2280,7 +2280,7 @@ def _resolve_stale_data_in_sliding_sync_tables( FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - https://github.com/element-hq/synapse/issues/TODO) + https://github.com/element-hq/synapse/issues/17623) """ _resolve_stale_data_in_sliding_sync_joined_rooms_table(txn) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 71539e6bd7d..ed39ef6ce55 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -16,6 +16,11 @@ -- during the background update to populate `sliding_sync_joined_rooms` which works but -- it takes a lot of work for the database to grab `DISTINCT` room_ids given how many -- state events there are for each room. +-- +-- FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the +-- foreground update for +-- `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by +-- https://github.com/element-hq/synapse/issues/17623) CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms_to_recalculate( room_id TEXT NOT NULL REFERENCES rooms(room_id), PRIMARY KEY (room_id) diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index 569863ab13a..b27cea0eb9f 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -4214,7 +4214,7 @@ class SlidingSyncTablesCatchUpBackgroundUpdatesTestCase(SlidingSyncTablesTestCas FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the foreground update for `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by - https://github.com/element-hq/synapse/issues/TODO) + https://github.com/element-hq/synapse/issues/17623) """ def test_joined_background_update_catch_up_new_room(self) -> None: From 2f6ee08cf4f4ad866ca5e0be3e0566752b551f40 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 17:21:59 -0500 Subject: [PATCH 139/142] Fix join condition not working in Postgres ``` common column name "room_id" appears more than once in left table ``` --- synapse/storage/databases/main/events_bg_updates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 605e1c6b85f..89c59df1ef4 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1881,9 +1881,9 @@ def _find_memberships_to_update_txn( c.membership, e.stream_ordering, e.outlier - FROM local_current_membership as c + FROM local_current_membership AS c INNER JOIN events AS e USING (event_id) - LEFT JOIN rooms AS r USING (room_id) + LEFT JOIN rooms AS r ON (c.room_id = r.room_id) WHERE c.room_id > ? ORDER BY c.room_id ASC LIMIT ? @@ -1911,7 +1911,7 @@ def _find_memberships_to_update_txn( c.membership, c.event_stream_ordering, e.outlier - FROM local_current_membership as c + FROM local_current_membership AS c INNER JOIN events AS e USING (event_id) WHERE event_stream_ordering > ? ORDER BY event_stream_ordering ASC From 6622a1c2bbdce41d644299c405d299b9d14df6d6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 17:51:06 -0500 Subject: [PATCH 140/142] Add `instance_name` to `sliding_sync_membership_snapshots` So we can craft `PersistedEventPosition(...)` --- synapse/storage/databases/main/events.py | 12 ++++++++++- .../databases/main/events_bg_updates.py | 21 +++++++++++++++++-- .../delta/87/01_sliding_sync_memberships.sql | 3 +++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f8d176d1333..07805725867 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -164,6 +164,7 @@ class SlidingSyncMembershipInfo: membership_event_id: str membership: str membership_event_stream_ordering: int + membership_event_instance_name: str @attr.s(slots=True, auto_attribs=True) @@ -457,11 +458,15 @@ async def _calculate_sliding_sync_table_changes( membership_event_id, user_id, ) in membership_event_id_to_user_id_map.items(): - # We should only be seeing events with stream_ordering assigned by this point + # We should only be seeing events with `stream_ordering`/`instance_name` assigned by this point membership_event_stream_ordering = membership_event_map[ membership_event_id ].internal_metadata.stream_ordering assert membership_event_stream_ordering is not None + membership_event_instance_name = membership_event_map[ + membership_event_id + ].internal_metadata.instance_name + assert membership_event_instance_name is not None membership_infos_to_insert_membership_snapshots.append( SlidingSyncMembershipInfo( @@ -470,6 +475,7 @@ async def _calculate_sliding_sync_table_changes( membership_event_id=membership_event_id, membership=membership_event_map[membership_event_id].membership, membership_event_stream_ordering=membership_event_stream_ordering, + membership_event_instance_name=membership_event_instance_name, ) ) @@ -1811,6 +1817,7 @@ def _update_current_state_txn( "membership_event_id", "membership", "event_stream_ordering", + "event_instance_name", ] + list( sliding_sync_table_changes.membership_snapshot_shared_insert_values.keys() @@ -1821,6 +1828,7 @@ def _update_current_state_txn( membership_info.membership_event_id, membership_info.membership, membership_info.membership_event_stream_ordering, + membership_info.membership_event_instance_name, ] + list( sliding_sync_table_changes.membership_snapshot_shared_insert_values.values() @@ -2755,6 +2763,7 @@ def _store_room_members_txn( for event in events: # Sanity check that we're working with persisted events assert event.internal_metadata.stream_ordering is not None + assert event.internal_metadata.instance_name is not None # We update the local_current_membership table only if the event is # "current", i.e., its something that has just happened. @@ -2805,6 +2814,7 @@ def _store_room_members_txn( "membership_event_id": event.event_id, "membership": event.membership, "event_stream_ordering": event.internal_metadata.stream_ordering, + "event_instance_name": event.internal_metadata.instance_name, } if event.membership == Membership.LEAVE: # Inherit the meta data from the remote invite/knock. When using diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 89c59df1ef4..1a3b0d1822c 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1861,7 +1861,9 @@ async def _sliding_sync_membership_snapshots_bg_update( def _find_memberships_to_update_txn( txn: LoggingTransaction, - ) -> List[Tuple[str, Optional[str], str, str, str, str, int, bool]]: + ) -> List[ + Tuple[str, Optional[str], str, str, str, str, int, Optional[str], bool] + ]: # Fetch the set of event IDs that we want to update if initial_phase: @@ -1880,6 +1882,7 @@ def _find_memberships_to_update_txn( c.event_id, c.membership, e.stream_ordering, + e.instance_name, e.outlier FROM local_current_membership AS c INNER JOIN events AS e USING (event_id) @@ -1910,6 +1913,7 @@ def _find_memberships_to_update_txn( c.event_id, c.membership, c.event_stream_ordering, + e.instance_name, e.outlier FROM local_current_membership AS c INNER JOIN events AS e USING (event_id) @@ -1923,7 +1927,11 @@ def _find_memberships_to_update_txn( raise Exception("last_event_stream_ordering should not be None") memberships_to_update_rows = cast( - List[Tuple[str, Optional[str], str, str, str, str, int, bool]], + List[ + Tuple[ + str, Optional[str], str, str, str, str, int, Optional[str], bool + ] + ], txn.fetchall(), ) @@ -1990,6 +1998,7 @@ def _find_previous_membership_txn( membership_event_id, membership, membership_event_stream_ordering, + membership_event_instance_name, is_outlier, ) in memberships_to_update_rows: # We don't know how to handle `membership` values other than these. The @@ -2174,6 +2183,9 @@ def _find_previous_membership_txn( membership_event_id=membership_event_id, membership=membership, membership_event_stream_ordering=membership_event_stream_ordering, + # If instance_name is null we default to "master" + membership_event_instance_name=membership_event_instance_name + or "master", ) def _fill_table_txn(txn: LoggingTransaction) -> None: @@ -2188,6 +2200,9 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: membership_event_stream_ordering = ( membership_info.membership_event_stream_ordering ) + membership_event_instance_name = ( + membership_info.membership_event_instance_name + ) # We don't need to upsert the state because we never partially # insert/update the snapshots and anything already there is up-to-date @@ -2208,6 +2223,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: "membership_event_id": membership_event_id, "membership": membership, "event_stream_ordering": membership_event_stream_ordering, + "event_instance_name": membership_event_instance_name, }, ) # We need to find the `forgotten` value during the transaction because @@ -2239,6 +2255,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: _membership_event_id, _membership, membership_event_stream_ordering, + _membership_event_instance_name, _is_outlier, ) = memberships_to_update_rows[-1] diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index ed39ef6ce55..60573cda487 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -100,6 +100,9 @@ CREATE TABLE IF NOT EXISTS sliding_sync_membership_snapshots( forgotten INTEGER DEFAULT 0 NOT NULL, -- `stream_ordering` of the `membership_event_id` event_stream_ordering BIGINT NOT NULL REFERENCES events(stream_ordering), + -- `instance_name` of the worker that persisted the `membership_event_id`. + -- Useful for crafting `PersistedEventPosition(...)` + event_instance_name TEXT NOT NULL, -- For remote invites/knocks that don't include any stripped state, we want to be -- able to distinguish between a room with `None` as valid value for some state and -- room where the state is completely unknown. Basically, this should be True unless From bcc3e50a125e37b0f9301913f44bb73191f78488 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 19:20:31 -0500 Subject: [PATCH 141/142] Fully-insert `sliding_sync_joined_rooms` rows This way if the row exists, we can rely on the information in it. And only use a fallback for rows that don't exist. --- synapse/storage/databases/main/events.py | 57 +++- .../databases/main/events_bg_updates.py | 12 +- tests/storage/test_sliding_sync_tables.py | 259 ++++++++++-------- 3 files changed, 206 insertions(+), 122 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 07805725867..60c92e58041 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -181,6 +181,16 @@ class SlidingSyncTableChanges: # This should be *some* value that points to a real event in the room if we are # still joined to the room and some state is changing (`to_insert` or `to_delete`). joined_room_best_effort_most_recent_stream_ordering: Optional[int] + # If the row doesn't exist in the `sliding_sync_joined_rooms` table, we need to + # fully-insert it which means we also need to include a `bump_stamp` value to use + # for the row. This should only be populated when we're trying to fully-insert a + # row. + # + # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + # foreground update for + # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + # https://github.com/element-hq/synapse/issues/17623) + joined_room_bump_stamp_to_fully_insert: Optional[int] # Values to upsert into `sliding_sync_joined_rooms` joined_room_updates: SlidingSyncStateInsertValues @@ -401,6 +411,7 @@ async def _calculate_sliding_sync_table_changes( return SlidingSyncTableChanges( room_id=room_id, joined_room_best_effort_most_recent_stream_ordering=None, + joined_room_bump_stamp_to_fully_insert=None, joined_room_updates={}, membership_snapshot_shared_insert_values={}, to_insert_membership_snapshots=[], @@ -558,11 +569,49 @@ async def _calculate_sliding_sync_table_changes( # joined_room_updates: SlidingSyncStateInsertValues = {} best_effort_most_recent_stream_ordering: Optional[int] = None + bump_stamp_to_fully_insert: Optional[int] = None if not delta_state.no_longer_in_room: + current_state_ids_map = {} + + # Always fully-insert rows if they don't already exist in the + # `sliding_sync_joined_rooms` table. This way we can rely on a row if it + # exists in the table. + # + # FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + # foreground update for + # `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + # https://github.com/element-hq/synapse/issues/17623) + existing_row_in_table = await self.store.db_pool.simple_select_one_onecol( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + retcol="room_id", + allow_none=True, + ) + if not existing_row_in_table: + most_recent_bump_event_pos_results = ( + await self.store.get_last_event_pos_in_room( + room_id, + event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES, + ) + ) + bump_stamp_to_fully_insert = ( + most_recent_bump_event_pos_results[1].stream + if most_recent_bump_event_pos_results is not None + else None + ) + + current_state_ids_map = dict( + await self.store.get_partial_filtered_current_state_ids( + room_id, + state_filter=StateFilter.from_types( + SLIDING_SYNC_RELEVANT_STATE_SET + ), + ) + ) + # Look through the items we're going to insert into the current state to see # if there is anything that we care about and should also update in the # `sliding_sync_joined_rooms` table. - current_state_ids_map = {} for state_key, event_id in to_insert.items(): if state_key in SLIDING_SYNC_RELEVANT_STATE_SET: current_state_ids_map[state_key] = event_id @@ -654,6 +703,7 @@ async def _calculate_sliding_sync_table_changes( room_id=room_id, # For `sliding_sync_joined_rooms` joined_room_best_effort_most_recent_stream_ordering=best_effort_most_recent_stream_ordering, + joined_room_bump_stamp_to_fully_insert=bump_stamp_to_fully_insert, joined_room_updates=joined_room_updates, # For `sliding_sync_membership_snapshots` membership_snapshot_shared_insert_values=membership_snapshot_shared_insert_values, @@ -1743,7 +1793,10 @@ def _update_current_state_txn( # better to just rely on # `_update_sliding_sync_tables_with_new_persisted_events_txn()` # to do the right thing (same for `bump_stamp`). - "event_stream_ordering": sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering + "event_stream_ordering": sliding_sync_table_changes.joined_room_best_effort_most_recent_stream_ordering, + # If we're trying to fully-insert a row, we need to provide a + # value for `bump_stamp` if it exists for the room. + "bump_stamp": sliding_sync_table_changes.joined_room_bump_stamp_to_fully_insert, }, ) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 1a3b0d1822c..88ff5aa2df4 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1758,7 +1758,8 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: ) bump_stamp = joined_room_stream_ordering_update.most_recent_bump_stamp - # Check if the current state has been updated since we gathered it + # Check if the current state has been updated since we gathered it. + # We're being careful not to insert/overwrite with stale data. state_deltas_since_we_gathered_current_state = ( self.get_current_state_deltas_for_room_txn( txn, @@ -1787,17 +1788,16 @@ def _fill_table_txn(txn: LoggingTransaction) -> None: + "Raising exception so we can just try again." ) - # Since we partially update the `sliding_sync_joined_rooms` as new state - # is sent, we need to update the state fields `ON CONFLICT`. We just - # have to be careful we're not overwriting it with stale data (see - # `most_recent_current_state_delta_stream_id` check above). + # Since we fully insert rows into `sliding_sync_joined_rooms`, we can + # just do everything on insert and `ON CONFLICT DO NOTHING`. # self.db_pool.simple_upsert_txn( txn, table="sliding_sync_joined_rooms", keyvalues={"room_id": room_id}, - values=update_map, + values={}, insertion_values={ + **update_map, # The reason we're only *inserting* (not *updating*) `event_stream_ordering` # and `bump_stamp` is because if they are present, that means they are already # up-to-date. diff --git a/tests/storage/test_sliding_sync_tables.py b/tests/storage/test_sliding_sync_tables.py index b27cea0eb9f..4be098b6f64 100644 --- a/tests/storage/test_sliding_sync_tables.py +++ b/tests/storage/test_sliding_sync_tables.py @@ -1108,6 +1108,151 @@ def test_joined_room_meta_state_reset(self) -> None: user2_snapshot, ) + def test_joined_room_fully_insert_on_state_update(self) -> None: + """ + Test that when an existing room updates it's state and we don't have a + corresponding row in `sliding_sync_joined_rooms` yet, we fully-insert the row + even though only a tiny piece of state changed. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/17623) + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + + # Clean-up the `sliding_sync_joined_rooms` table as if the the room never made + # it into the table. This is to simulate an existing room (before we event added + # the sliding sync tables) not being in the `sliding_sync_joined_rooms` table + # yet. + self.get_success( + self.store.db_pool.simple_delete( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + desc="simulate existing room not being in the sliding_sync_joined_rooms table yet", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Encrypt the room + self.helper.send_state( + room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + # The room should now be in the `sliding_sync_joined_rooms` table + # (fully-inserted with all of the state values). + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + {room_id}, + exact=True, + ) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id) + ) + self.assertEqual( + sliding_sync_joined_rooms_results[room_id], + _SlidingSyncJoinedRoomResult( + room_id=room_id, + # This should be whatever is the last event in the room + event_stream_ordering=state_map[ + (EventTypes.RoomEncryption, "") + ].internal_metadata.stream_ordering, + bump_stamp=state_map[ + (EventTypes.Create, "") + ].internal_metadata.stream_ordering, + room_type=None, + room_name="my super duper room", + is_encrypted=True, + tombstone_successor_room_id=None, + ), + ) + + def test_joined_room_nothing_if_not_in_table_when_bumped(self) -> None: + """ + Test a new message being sent in an existing room when we don't have a + corresponding row in `sliding_sync_joined_rooms` yet; either nothing should + happen or we should fully-insert the row. We currently do nothing. + + FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the + foreground update for + `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by + https://github.com/element-hq/synapse/issues/17623) + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Add a room name + self.helper.send_state( + room_id, + EventTypes.Name, + {"name": "my super duper room"}, + tok=user1_tok, + ) + # Encrypt the room + self.helper.send_state( + room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + # Clean-up the `sliding_sync_joined_rooms` table as if the the room never made + # it into the table. This is to simulate an existing room (before we event added + # the sliding sync tables) not being in the `sliding_sync_joined_rooms` table + # yet. + self.get_success( + self.store.db_pool.simple_delete( + table="sliding_sync_joined_rooms", + keyvalues={"room_id": room_id}, + desc="simulate existing room not being in the sliding_sync_joined_rooms table yet", + ) + ) + + # We shouldn't find anything in the table because we just deleted them in + # preparation for the test. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + + # Send a new message to bump the room + self.helper.send(room_id, "some message", tok=user1_tok) + + # Either nothing should happen or we should fully-insert the row. We currently + # do nothing for non-state events. + sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() + self.assertIncludes( + set(sliding_sync_joined_rooms_results.keys()), + set(), + exact=True, + ) + def test_non_join_space_room_with_info(self) -> None: """ Test users who was invited shows up in `sliding_sync_membership_snapshots`. @@ -2750,120 +2895,6 @@ def test_joined_background_update_missing(self) -> None: ), ) - def test_joined_background_update_partial(self) -> None: - """ - Test that the background update for `sliding_sync_joined_rooms` populates - partially updated rows. - """ - user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") - - # Create rooms with various levels of state that should appear in the table - # - room_id_with_info = self.helper.create_room_as(user1_id, tok=user1_tok) - # Add a room name - self.helper.send_state( - room_id_with_info, - EventTypes.Name, - {"name": "my super duper room"}, - tok=user1_tok, - ) - # Encrypt the room - self.helper.send_state( - room_id_with_info, - EventTypes.RoomEncryption, - {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, - tok=user1_tok, - ) - - state_map = self.get_success( - self.storage_controllers.state.get_current_state(room_id_with_info) - ) - - # Clean-up the `sliding_sync_joined_rooms` table as if the the encryption event - # never made it into the table. - self.get_success( - self.store.db_pool.simple_update( - table="sliding_sync_joined_rooms", - keyvalues={"room_id": room_id_with_info}, - updatevalues={"is_encrypted": False}, - desc="sliding_sync_joined_rooms.test_joined_background_update_partial", - ) - ) - - # We should see the partial row that we made in preparation for the test. - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id_with_info}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id_with_info], - _SlidingSyncJoinedRoomResult( - room_id=room_id_with_info, - # Lastest event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomEncryption, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=False, - tombstone_successor_room_id=None, - ), - ) - - # Insert and run the background update. - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - "progress_json": "{}", - }, - ) - ) - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - { - "update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE, - "progress_json": "{}", - "depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE, - }, - ) - ) - self.store.db_pool.updates._all_done = False - self.wait_for_background_updates() - - # Make sure the table is populated - sliding_sync_joined_rooms_results = self._get_sliding_sync_joined_rooms() - self.assertIncludes( - set(sliding_sync_joined_rooms_results.keys()), - {room_id_with_info}, - exact=True, - ) - self.assertEqual( - sliding_sync_joined_rooms_results[room_id_with_info], - _SlidingSyncJoinedRoomResult( - room_id=room_id_with_info, - # Lastest event sent in the room - event_stream_ordering=state_map[ - (EventTypes.RoomEncryption, "") - ].internal_metadata.stream_ordering, - bump_stamp=state_map[ - (EventTypes.Create, "") - ].internal_metadata.stream_ordering, - room_type=None, - room_name="my super duper room", - is_encrypted=True, - tombstone_successor_room_id=None, - ), - ) - def test_membership_snapshots_background_update_joined(self) -> None: """ Test that the background update for `sliding_sync_membership_snapshots` From b63188c65b829c33f001376585ad0e5251f86a05 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 28 Aug 2024 19:35:59 -0500 Subject: [PATCH 142/142] Explain more in schema --- .../schema/main/delta/87/01_sliding_sync_memberships.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql index 60573cda487..2f71e541f8d 100644 --- a/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql +++ b/synapse/storage/schema/main/delta/87/01_sliding_sync_memberships.sql @@ -17,6 +17,12 @@ -- it takes a lot of work for the database to grab `DISTINCT` room_ids given how many -- state events there are for each room. -- +-- This table is prefilled with every room in the `rooms` table (see the +-- `sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update` background +-- update). This table is also updated whenever we come across stale data so that we can +-- catch-up with all of the new data if Synapse was downgraded (see +-- `_resolve_stale_data_in_sliding_sync_tables`). +-- -- FIXME: This can be removed once we bump `SCHEMA_COMPAT_VERSION` and run the -- foreground update for -- `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked by @@ -150,6 +156,8 @@ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream -- 1. Add a background update to prefill `sliding_sync_joined_rooms_to_recalculate`. -- We do a one-shot bulk insert from the `rooms` table to prefill. -- 2. Add a background update to populate the new `sliding_sync_joined_rooms` table +-- based on the rooms listed in the `sliding_sync_joined_rooms_to_recalculate` +-- table. -- INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{}');