From 88f3559fe880124ea7a46a2d6cd8f6e9cd2a8790 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 13:34:43 +0100 Subject: [PATCH 1/8] Extract `_resolve_state_at_missing_prevs` method This was done with Pycharm's "extract method" --- synapse/handlers/federation.py | 195 ++++++++++++++++----------------- 1 file changed, 97 insertions(+), 98 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 529d025c39b5..51f8e4776c33 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -352,106 +352,10 @@ async def on_receive_pdu( ) else: - # We don't have all of the prev_events for this event. - # - # In this case, we need to fall back to asking another server in the - # federation for the state at this event. That's ok provided we then - # resolve the state against other bits of the DAG before using it (which - # will ensure that you can't just take over a room by sending an event, - # withholding its prev_events, and declaring yourself to be an admin in - # the subsequent state request). - # - # Since we're pulling this event as a missing prev_event, then clearly - # this event is not going to become the only forward-extremity and we are - # guaranteed to resolve its state against our existing forward - # extremities, so that should be fine. - # - # XXX this really feels like it could/should be merged with the above, - # but there is an interaction with min_depth that I'm not really - # following. - logger.info( - "Event %s is missing prev_events %s: calculating state for a " - "backwards extremity", - event_id, - shortstr(missing_prevs), + state = await self._resolve_state_at_missing_prevs( + origin, pdu, room_id, event_id, missing_prevs, seen ) - # Calculate the state after each of the previous events, and - # resolve them to find the correct state at the current event. - event_map = {event_id: pdu} - try: - # Get the state of the events we know about - ours = await self.state_store.get_state_groups_ids( - room_id, seen - ) - - # state_maps is a list of mappings from (type, state_key) to event_id - state_maps: List[StateMap[str]] = list(ours.values()) - - # we don't need this any more, let's delete it. - del ours - - # Ask the remote server for the states we don't - # know about - for p in missing_prevs: - logger.info( - "Requesting state after missing prev_event %s", p - ) - - with nested_logging_context(p): - # note that if any of the missing prevs share missing state or - # auth events, the requests to fetch those events are deduped - # by the get_pdu_cache in federation_client. - remote_state = ( - await self._get_state_after_missing_prev_event( - origin, room_id, p - ) - ) - - remote_state_map = { - (x.type, x.state_key): x.event_id - for x in remote_state - } - state_maps.append(remote_state_map) - - for x in remote_state: - event_map[x.event_id] = x - - room_version = await self.store.get_room_version_id(room_id) - state_map = await self._state_resolution_handler.resolve_events_with_store( - room_id, - room_version, - state_maps, - event_map, - state_res_store=StateResolutionStore(self.store), - ) - - # We need to give _process_received_pdu the actual state events - # rather than event ids, so generate that now. - - # First though we need to fetch all the events that are in - # state_map, so we can build up the state below. - evs = await self.store.get_events( - list(state_map.values()), - get_prev_content=False, - redact_behaviour=EventRedactBehaviour.AS_IS, - ) - event_map.update(evs) - - state = [event_map[e] for e in state_map.values()] - except Exception: - logger.warning( - "Error attempting to resolve state at missing " - "prev_events", - exc_info=True, - ) - raise FederationError( - "ERROR", - 403, - "We can't get valid state history.", - affected=event_id, - ) - # A second round of checks for all events. Check that the event passes auth # based on `auth_events`, this allows us to assert that the event would # have been allowed at some point. If an event passes this check its OK @@ -1493,6 +1397,101 @@ async def get_event(event_id: str): event_infos, ) + async def _resolve_state_at_missing_prevs( + self, dest, event, room_id, event_id, missing_prevs, seen + ): + # We don't have all of the prev_events for this event. + # + # In this case, we need to fall back to asking another server in the + # federation for the state at this event. That's ok provided we then + # resolve the state against other bits of the DAG before using it (which + # will ensure that you can't just take over a room by sending an event, + # withholding its prev_events, and declaring yourself to be an admin in + # the subsequent state request). + # + # Since we're pulling this event as a missing prev_event, then clearly + # this event is not going to become the only forward-extremity and we are + # guaranteed to resolve its state against our existing forward + # extremities, so that should be fine. + # + # XXX this really feels like it could/should be merged with the above, + # but there is an interaction with min_depth that I'm not really + # following. + logger.info( + "Event %s is missing prev_events %s: calculating state for a " + "backwards extremity", + event_id, + shortstr(missing_prevs), + ) + # Calculate the state after each of the previous events, and + # resolve them to find the correct state at the current event. + event_map = {event_id: event} + try: + # Get the state of the events we know about + ours = await self.state_store.get_state_groups_ids(room_id, seen) + + # state_maps is a list of mappings from (type, state_key) to event_id + state_maps: List[StateMap[str]] = list(ours.values()) + + # we don't need this any more, let's delete it. + del ours + + # Ask the remote server for the states we don't + # know about + for p in missing_prevs: + logger.info("Requesting state after missing prev_event %s", p) + + with nested_logging_context(p): + # note that if any of the missing prevs share missing state or + # auth events, the requests to fetch those events are deduped + # by the get_pdu_cache in federation_client. + remote_state = await self._get_state_after_missing_prev_event( + dest, room_id, p + ) + + remote_state_map = { + (x.type, x.state_key): x.event_id for x in remote_state + } + state_maps.append(remote_state_map) + + for x in remote_state: + event_map[x.event_id] = x + + room_version = await self.store.get_room_version_id(room_id) + state_map = await self._state_resolution_handler.resolve_events_with_store( + room_id, + room_version, + state_maps, + event_map, + state_res_store=StateResolutionStore(self.store), + ) + + # We need to give _process_received_pdu the actual state events + # rather than event ids, so generate that now. + + # First though we need to fetch all the events that are in + # state_map, so we can build up the state below. + evs = await self.store.get_events( + list(state_map.values()), + get_prev_content=False, + redact_behaviour=EventRedactBehaviour.AS_IS, + ) + event_map.update(evs) + + state = [event_map[e] for e in state_map.values()] + except Exception: + logger.warning( + "Error attempting to resolve state at missing " "prev_events", + exc_info=True, + ) + raise FederationError( + "ERROR", + 403, + "We can't get valid state history.", + affected=event_id, + ) + return state + def _sanity_check_event(self, ev: EventBase) -> None: """ Do some early sanity checks of a received event From bd35f2a2eb1218b20e9b315a2afe2ff917b00a4f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 13:47:34 +0100 Subject: [PATCH 2/8] remove dead comment --- synapse/handlers/federation.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 51f8e4776c33..5ff07288e352 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1413,10 +1413,6 @@ async def _resolve_state_at_missing_prevs( # this event is not going to become the only forward-extremity and we are # guaranteed to resolve its state against our existing forward # extremities, so that should be fine. - # - # XXX this really feels like it could/should be merged with the above, - # but there is an interaction with min_depth that I'm not really - # following. logger.info( "Event %s is missing prev_events %s: calculating state for a " "backwards extremity", From 85e5df872a4a3dd7d9e5b087e05d2a155027b4d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 13:49:34 +0100 Subject: [PATCH 3/8] Remove some parameters from `_resolve_state_at_missing_prevs` Most of these parameters are easy to calculate. --- synapse/handlers/federation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5ff07288e352..86b36dbb1172 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -352,9 +352,7 @@ async def on_receive_pdu( ) else: - state = await self._resolve_state_at_missing_prevs( - origin, pdu, room_id, event_id, missing_prevs, seen - ) + state = await self._resolve_state_at_missing_prevs(origin, pdu) # A second round of checks for all events. Check that the event passes auth # based on `auth_events`, this allows us to assert that the event would @@ -1397,9 +1395,14 @@ async def get_event(event_id: str): event_infos, ) - async def _resolve_state_at_missing_prevs( - self, dest, event, room_id, event_id, missing_prevs, seen - ): + async def _resolve_state_at_missing_prevs(self, dest, event): + room_id = event.room_id + event_id = event.event_id + + prevs = set(event.prev_event_ids()) + seen = await self.store.have_events_in_timeline(prevs) + missing_prevs = prevs - seen + # We don't have all of the prev_events for this event. # # In this case, we need to fall back to asking another server in the From 2b487ded69f3a53d56e78da2ef8b75d5e71e53af Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 13:41:36 +0100 Subject: [PATCH 4/8] Reorder `sent_to_us_directly`/`missing_prevs` conditions These should be equivalent --- synapse/handlers/federation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 86b36dbb1172..5305139f48ab 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -289,8 +289,8 @@ async def on_receive_pdu( seen = await self.store.have_events_in_timeline(prevs) missing_prevs = prevs - seen - if missing_prevs: - if sent_to_us_directly: + if sent_to_us_directly: + if missing_prevs: # We only backfill backwards to the min depth. min_depth = await self.get_min_depth_for_context(pdu.room_id) logger.debug("min_depth: %d", min_depth) @@ -351,7 +351,8 @@ async def on_receive_pdu( affected=pdu.event_id, ) - else: + else: + if missing_prevs: state = await self._resolve_state_at_missing_prevs(origin, pdu) # A second round of checks for all events. Check that the event passes auth From cfb347a3eea0b7c1849a2d0d2ba926b5b7e0e99f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 13:42:39 +0100 Subject: [PATCH 5/8] Push down `if not missing_prevs` condition --- synapse/handlers/federation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5305139f48ab..bb16e295a3d8 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -352,8 +352,7 @@ async def on_receive_pdu( ) else: - if missing_prevs: - state = await self._resolve_state_at_missing_prevs(origin, pdu) + state = await self._resolve_state_at_missing_prevs(origin, pdu) # A second round of checks for all events. Check that the event passes auth # based on `auth_events`, this allows us to assert that the event would @@ -1404,6 +1403,9 @@ async def _resolve_state_at_missing_prevs(self, dest, event): seen = await self.store.have_events_in_timeline(prevs) missing_prevs = prevs - seen + if not missing_prevs: + return None + # We don't have all of the prev_events for this event. # # In this case, we need to fall back to asking another server in the From 9885bdfb729be8bcbab0f611cdb8a4de16efcf0c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 14:07:38 +0100 Subject: [PATCH 6/8] Type annotations and docstrings for `_resolve_state_at_missing_prevs` --- synapse/handlers/federation.py | 48 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index bb16e295a3d8..3d275a0c882b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1395,7 +1395,38 @@ async def get_event(event_id: str): event_infos, ) - async def _resolve_state_at_missing_prevs(self, dest, event): + async def _resolve_state_at_missing_prevs( + self, dest: str, event: EventBase + ) -> Optional[Iterable[EventBase]]: + """Calculate the state at an event with missing prev_events. + + This is used when we have pulled a batch of events from a remote server, and + still don't have all the prev_events. + + If we already have all the prev_events for `event`, this method does nothing. + + Otherwise, the missing prevs become new backwards extremities, and we fall back + to asking the remote server for the state after each missing `prev_event`, + and resolving across them. + + That's ok provided we then resolve the state against other bits of the DAG + before using it - in other words, that the received event `event` is not going + to become the only forwards_extremity in the room (which will ensure that you + can't just take over a room by sending an event, withholding its prev_events, + and declaring yourself to be an admin in the subsequent state request). + + In other words: we should only call this method if `event` has been *pulled* + as part of a batch of missing prev events, or similar. + + Params: + dest: the remote server to ask for state at the missing prevs. Typically, + this will be the server we got `event` from. + event: an event to check for missing prevs. + + Returns: + if we already had all the prev events, `None`. Otherwise, returns a list of + the events in the state at `event`. + """ room_id = event.room_id event_id = event.event_id @@ -1406,19 +1437,6 @@ async def _resolve_state_at_missing_prevs(self, dest, event): if not missing_prevs: return None - # We don't have all of the prev_events for this event. - # - # In this case, we need to fall back to asking another server in the - # federation for the state at this event. That's ok provided we then - # resolve the state against other bits of the DAG before using it (which - # will ensure that you can't just take over a room by sending an event, - # withholding its prev_events, and declaring yourself to be an admin in - # the subsequent state request). - # - # Since we're pulling this event as a missing prev_event, then clearly - # this event is not going to become the only forward-extremity and we are - # guaranteed to resolve its state against our existing forward - # extremities, so that should be fine. logger.info( "Event %s is missing prev_events %s: calculating state for a " "backwards extremity", @@ -1483,7 +1501,7 @@ async def _resolve_state_at_missing_prevs(self, dest, event): state = [event_map[e] for e in state_map.values()] except Exception: logger.warning( - "Error attempting to resolve state at missing " "prev_events", + "Error attempting to resolve state at missing prev_events", exc_info=True, ) raise FederationError( From 67ea6b4604e388ee0a8ddb8a1447e9bb49e8c8d8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 14:10:06 +0100 Subject: [PATCH 7/8] Push down `missing_prevs` calculation No point calculating this unless we need it. --- synapse/handlers/federation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3d275a0c882b..de86918b7bc0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -285,11 +285,11 @@ async def on_receive_pdu( # - Fetching any missing prev events to fill in gaps in the graph # - Fetching state if we have a hole in the graph if not pdu.internal_metadata.is_outlier(): - prevs = set(pdu.prev_event_ids()) - seen = await self.store.have_events_in_timeline(prevs) - missing_prevs = prevs - seen - if sent_to_us_directly: + prevs = set(pdu.prev_event_ids()) + seen = await self.store.have_events_in_timeline(prevs) + missing_prevs = prevs - seen + if missing_prevs: # We only backfill backwards to the min depth. min_depth = await self.get_min_depth_for_context(pdu.room_id) From 177a8ad5fc84e36ec3c1f273d99babca853a1b01 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Aug 2021 14:11:01 +0100 Subject: [PATCH 8/8] changelog --- changelog.d/10624.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10624.misc diff --git a/changelog.d/10624.misc b/changelog.d/10624.misc new file mode 100644 index 000000000000..9a765435dbe4 --- /dev/null +++ b/changelog.d/10624.misc @@ -0,0 +1 @@ +Clean up some of the federation event authentication code for clarity.