From 867361e2dea28c81599e3485cc33ac479bc62050 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <richard@matrix.org> Date: Mon, 15 Feb 2021 18:34:18 +0000 Subject: [PATCH 1/5] Populate `internal_metadata.outlier` based on `events` table Rather than relying on `outlier` being in the `internal_metadata` column, populate it based on the `events.outlier` column. --- synapse/storage/databases/main/events_worker.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index edbe42f2bf6c..7389a4766cb1 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -799,6 +799,7 @@ async def _get_events_from_db(self, event_ids, allow_rejected=False): rejected_reason=rejected_reason, ) original_ev.internal_metadata.stream_ordering = row["stream_ordering"] + original_ev.internal_metadata.outlier = row["outlier"] event_map[event_id] = original_ev @@ -905,7 +906,8 @@ def _fetch_event_rows(self, txn, event_ids): ej.json, ej.format_version, r.room_version, - rej.reason + rej.reason, + e.outlier FROM events AS e JOIN event_json AS ej USING (event_id) LEFT JOIN rooms r ON r.room_id = e.room_id @@ -929,6 +931,7 @@ def _fetch_event_rows(self, txn, event_ids): "room_version_id": row[5], "rejected_reason": row[6], "redactions": [], + "outlier": row[7], } # check for redactions From 3cd4512d878e96dbdfe4f35d277e2682ed201252 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <richard@matrix.org> Date: Mon, 15 Feb 2021 18:43:58 +0000 Subject: [PATCH 2/5] Move `outlier` out of InternalMetadata._dict Ultimately, this will allow us to stop writing it to the database. For now, we have to grandfather it back in so as to maintain compatibility with older versions of Synapse. --- synapse/events/__init__.py | 9 ++++++--- synapse/events/utils.py | 2 ++ synapse/replication/http/federation.py | 3 +++ synapse/replication/http/send_event.py | 4 ++++ synapse/storage/databases/main/events.py | 19 +++++++++++++++++-- 5 files changed, 32 insertions(+), 5 deletions(-) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 3ec4120f85c2..8f6b955d17b7 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -98,7 +98,7 @@ def __get__(self, instance, owner=None): class _EventInternalMetadata: - __slots__ = ["_dict", "stream_ordering"] + __slots__ = ["_dict", "stream_ordering", "outlier"] def __init__(self, internal_metadata_dict: JsonDict): # we have to copy the dict, because it turns out that the same dict is @@ -108,7 +108,10 @@ def __init__(self, internal_metadata_dict: JsonDict): # the stream ordering of this event. None, until it has been persisted. self.stream_ordering = None # type: Optional[int] - outlier = DictProperty("outlier") # type: bool + # whether this event is an outlier (ie, whether we have the state at that point + # in the DAG) + self.outlier = False + out_of_band_membership = DictProperty("out_of_band_membership") # type: bool send_on_behalf_of = DictProperty("send_on_behalf_of") # type: str recheck_redaction = DictProperty("recheck_redaction") # type: bool @@ -129,7 +132,7 @@ def get_dict(self) -> JsonDict: return dict(self._dict) def is_outlier(self) -> bool: - return self._dict.get("outlier", False) + return self.outlier def is_out_of_band_membership(self) -> bool: """Whether this is an out of band membership, like an invite or an invite diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 7ca5c9940a3a..5022e0fcb37a 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -54,6 +54,8 @@ def prune_event(event: EventBase) -> EventBase: event.internal_metadata.stream_ordering ) + pruned_event.internal_metadata.outlier = event.internal_metadata.outlier + # Mark the event as redacted pruned_event.internal_metadata.redacted = True diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 8af53b4f2815..82ea3b895f9a 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -40,6 +40,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): // containing the event "event_format_version": .., // 1,2,3 etc: the event format version "internal_metadata": { .. serialized internal_metadata .. }, + "outlier": true|false, "rejected_reason": .., // The event.rejected_reason field "context": { .. serialized event context .. }, }], @@ -84,6 +85,7 @@ async def _serialize_payload(store, room_id, event_and_contexts, backfilled): "room_version": event.room_version.identifier, "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), + "outlier": event.internal_metadata.is_outlier(), "rejected_reason": event.rejected_reason, "context": serialized_context, } @@ -116,6 +118,7 @@ async def _handle_request(self, request): event = make_event_from_dict( event_dict, room_ver, internal_metadata, rejected_reason ) + event.internal_metadata.outlier = event_payload["outlier"] context = EventContext.deserialize( self.storage, event_payload["context"] diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 8fa104c8d3b4..1fb530f88bda 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -80,6 +80,10 @@ async def _serialize_payload( extra_users (list(UserID)): Any extra users to notify about event """ + # we only expect this interface to be used for local events, so they shouldn't + # be outliers. + assert not event.internal_metadata.is_outlier() + serialized_context = await context.serialize(event, store) payload = { diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cd1ceac50e3a..8d8b40a383ac 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1270,8 +1270,10 @@ def _update_outliers_txn(self, txn, events_and_contexts): logger.exception("") raise + # update the stored internal_metadata to update the "outlier" flag. + # TODO: This is unused as of Synapse 1.30. Remove it once we are happy + # to drop backwards-compatibility with 1.29. metadata_json = json_encoder.encode(event.internal_metadata.get_dict()) - sql = "UPDATE event_json SET internal_metadata = ? WHERE event_id = ?" txn.execute(sql, (metadata_json, event.event_id)) @@ -1319,6 +1321,19 @@ def event_dict(event): d.pop("redacted_because", None) return d + def get_internal_metadata(event): + im = event.internal_metadata.get_dict() + + # temporary hack for database compatibility with Synapse 1.29 and earlier: + # store the `outlier` flag inside the internal_metadata json as well as in + # the `events` table, so that if anyone rolls back to an older Synapse, + # things keep working. This can be removed once we are happy to drop support + # for that + if event.internal_metadata.is_outlier(): + im["outlier"] = True + + return im + self.db_pool.simple_insert_many_txn( txn, table="event_json", @@ -1327,7 +1342,7 @@ def event_dict(event): "event_id": event.event_id, "room_id": event.room_id, "internal_metadata": json_encoder.encode( - event.internal_metadata.get_dict() + get_internal_metadata(event) ), "json": json_encoder.encode(event_dict(event)), "format_version": event.format_version, From b52d4b567bbf318b344ade2ed95801794cf62ab3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <richard@matrix.org> Date: Mon, 15 Feb 2021 18:55:03 +0000 Subject: [PATCH 3/5] changelog --- changelog.d/9411.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9411.misc diff --git a/changelog.d/9411.misc b/changelog.d/9411.misc new file mode 100644 index 000000000000..c3e6cfa5f118 --- /dev/null +++ b/changelog.d/9411.misc @@ -0,0 +1 @@ +Preparatory steps for removing redundant `outlier` data from `event_json.internal_metadata` column. From 4f7c51695c2057d6cfa27202271b701be1138748 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <richard@matrix.org> Date: Tue, 16 Mar 2021 21:50:37 +0000 Subject: [PATCH 4/5] Preserve `outlier` flag via the ReplicationSendEventRestServlet api it turns out we *do* send outliers this way, in the form of locally-generated invite rejections. --- synapse/replication/http/send_event.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 1fb530f88bda..a4c5b4429207 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -40,6 +40,7 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): // containing the event "event_format_version": .., // 1,2,3 etc: the event format version "internal_metadata": { .. serialized internal_metadata .. }, + "outlier": true|false, "rejected_reason": .., // The event.rejected_reason field "context": { .. serialized event context .. }, "requester": { .. serialized requester .. }, @@ -79,11 +80,6 @@ async def _serialize_payload( ratelimit (bool) extra_users (list(UserID)): Any extra users to notify about event """ - - # we only expect this interface to be used for local events, so they shouldn't - # be outliers. - assert not event.internal_metadata.is_outlier() - serialized_context = await context.serialize(event, store) payload = { @@ -91,6 +87,7 @@ async def _serialize_payload( "room_version": event.room_version.identifier, "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), + "outlier": event.internal_metadata.is_outlier(), "rejected_reason": event.rejected_reason, "context": serialized_context, "requester": requester.serialize(), @@ -112,6 +109,7 @@ async def _handle_request(self, request, event_id): event = make_event_from_dict( event_dict, room_ver, internal_metadata, rejected_reason ) + event.internal_metadata.outlier = content["outlier"] requester = Requester.deserialize(self.store, content["requester"]) context = EventContext.deserialize(self.storage, content["context"]) From ca5af01be56af33ea44ca06b5baf2dfc1afc0dbd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <richard@matrix.org> Date: Tue, 16 Mar 2021 21:53:17 +0000 Subject: [PATCH 5/5] Bump version numbers in TODOs this isn't making it into Synapse 1.30. --- synapse/storage/databases/main/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8d8b40a383ac..98dac19a9525 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1271,8 +1271,8 @@ def _update_outliers_txn(self, txn, events_and_contexts): raise # update the stored internal_metadata to update the "outlier" flag. - # TODO: This is unused as of Synapse 1.30. Remove it once we are happy - # to drop backwards-compatibility with 1.29. + # TODO: This is unused as of Synapse 1.31. Remove it once we are happy + # to drop backwards-compatibility with 1.30. metadata_json = json_encoder.encode(event.internal_metadata.get_dict()) sql = "UPDATE event_json SET internal_metadata = ? WHERE event_id = ?" txn.execute(sql, (metadata_json, event.event_id)) @@ -1324,7 +1324,7 @@ def event_dict(event): def get_internal_metadata(event): im = event.internal_metadata.get_dict() - # temporary hack for database compatibility with Synapse 1.29 and earlier: + # temporary hack for database compatibility with Synapse 1.30 and earlier: # store the `outlier` flag inside the internal_metadata json as well as in # the `events` table, so that if anyone rolls back to an older Synapse, # things keep working. This can be removed once we are happy to drop support