From 70f636ec6eb2dce7171898143f2097f27fc7a992 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jun 2023 01:58:18 -0500 Subject: [PATCH 01/12] Avoid backfill when we already have messages to return Fix https://github.com/matrix-org/synapse/issues/15696 --- synapse/handlers/pagination.py | 126 ++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 32 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index d5257acb7da3..b98c84398fca 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -476,45 +476,35 @@ async def get_messages( room_id, requester, allow_departed_users=True ) - if pagin_config.direction == Direction.BACKWARDS: - # if we're going backwards, we might need to backfill. This - # requires that we have a topo token. - if room_token.topological: - curr_topo = room_token.topological - else: - curr_topo = await self.store.get_current_topological_token( - room_id, room_token.stream - ) - - if not use_admin_priviledge and membership == Membership.LEAVE: - # If they have left the room then clamp the token to be before - # they left the room, to save the effort of loading from the - # database. - - # This is only None if the room is world_readable, in which - # case "JOIN" would have been returned. - assert member_event_id + # If they have left the room then clamp the token to be before + # they left the room, to save the effort of loading from the + # database. + if ( + pagin_config.direction == Direction.BACKWARDS + and not use_admin_priviledge + and membership == Membership.LEAVE + ): + # This is only None if the room is world_readable, in which case + # "Membership.JOIN" would have been returned and we should never hit + # this branch. + assert member_event_id + + leave_token = await self.store.get_topological_token_for_event( + member_event_id + ) + assert leave_token.topological is not None - leave_token = await self.store.get_topological_token_for_event( - member_event_id + if leave_token.topological < curr_topo: + from_token = from_token.copy_and_replace( + StreamKeyType.ROOM, leave_token ) - assert leave_token.topological is not None - - if leave_token.topological < curr_topo: - from_token = from_token.copy_and_replace( - StreamKeyType.ROOM, leave_token - ) - - await self.hs.get_federation_handler().maybe_backfill( - room_id, - curr_topo, - limit=pagin_config.limit, - ) to_room_key = None if pagin_config.to_token: to_room_key = pagin_config.to_token.room_key + # Initially fetch the events from the database. With any luck, we can return + # these without having to backfill anything (handled below). events, next_key = await self.store.paginate_room_events( room_id=room_id, from_key=from_token.room_key, @@ -524,6 +514,78 @@ async def get_messages( event_filter=event_filter, ) + if pagin_config.direction == Direction.BACKWARDS: + event_depths: Set[int] = {event.depth for event in events} + sorted_event_depths = sorted(event_depths) + + found_big_gap = False + number_of_gaps = 0 + previous_event_depth = None + for event_depth in sorted_event_depths: + depth_gap = event_depth - previous_event_depth + if depth_gap > 0: + number_of_gaps += 1 + + # We only tolerate a single-event long gaps in the returned events + # because those are most likely just events we've failed to pull in the + # past. Anything longer than that is probably a sign that we're missing + # a decent chunk of history and we should try to backfill it. + # + # XXX: It's possible we could tolerate longer gaps if we checked that a + # given events `prev_events` is one that has failed pull attempts and we + # could just treat it like a dead branch of history for now or at least + # something that we don't need the block the client on to try pulling. + if event_depth - previous_event_depth > 1: + found_big_gap = True + break + previous_event_depth = event_depth + + # if we're going backwards, we might need to backfill. This + # requires that we have a topo token. + if room_token.topological: + curr_topo = room_token.topological + else: + curr_topo = await self.store.get_current_topological_token( + room_id, room_token.stream + ) + + # Backfill in the foreground if we found a big gap and want to fill in + # that history. + # + # TODO: Should we also consider not enough events to fill the `limit` as + # cause to backfill in the foreground? + if found_big_gap: + did_backfill = ( + await self.hs.get_federation_handler().maybe_backfill( + room_id, + curr_topo, + limit=pagin_config.limit, + ) + ) + + # If we did backfill something, refetch the events from the database to + # catch anything new that might have been added since we last fetched. + if did_backfill: + events, next_key = await self.store.paginate_room_events( + room_id=room_id, + from_key=from_token.room_key, + to_key=to_room_key, + direction=pagin_config.direction, + limit=pagin_config.limit, + event_filter=event_filter, + ) + else: + # Otherwise, we can backfill in the background for eventual + # consistency's sake but we don't need to block the client waiting + # for a costly federation call and processing. + run_as_background_process( + "maybe_backfill_in_the_background", + self.hs.get_federation_handler().maybe_backfill, + room_id, + curr_topo, + limit=pagin_config.limit, + ) + next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key) # if no events are returned from pagination, that implies From 3250dc6fa428fb35edf39f90c4af5706bd47bf6f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jun 2023 02:06:57 -0500 Subject: [PATCH 02/12] Add changelog --- changelog.d/15737.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/15737.feature diff --git a/changelog.d/15737.feature b/changelog.d/15737.feature new file mode 100644 index 000000000000..9a547b5ebdd0 --- /dev/null +++ b/changelog.d/15737.feature @@ -0,0 +1 @@ +Improve `/messages` response time by avoiding backfill when we already have messages to return. From 8c5d9ecd63b3be2b5b8a6a54f990b36062dbb0d7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jun 2023 16:51:26 -0500 Subject: [PATCH 03/12] Fix None case --- synapse/handlers/pagination.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index b98c84398fca..4c56d6afaa45 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -522,7 +522,11 @@ async def get_messages( number_of_gaps = 0 previous_event_depth = None for event_depth in sorted_event_depths: - depth_gap = event_depth - previous_event_depth + depth_gap = ( + event_depth - previous_event_depth + if previous_event_depth is not None + else 0 + ) if depth_gap > 0: number_of_gaps += 1 @@ -535,7 +539,7 @@ async def get_messages( # given events `prev_events` is one that has failed pull attempts and we # could just treat it like a dead branch of history for now or at least # something that we don't need the block the client on to try pulling. - if event_depth - previous_event_depth > 1: + if depth_gap > 1: found_big_gap = True break previous_event_depth = event_depth From 4de6313a1c56490930145295d944603c19a13c9c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jun 2023 17:23:25 -0500 Subject: [PATCH 04/12] Fix lints --- synapse/handlers/pagination.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 4c56d6afaa45..9223cad55344 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -476,6 +476,16 @@ async def get_messages( room_id, requester, allow_departed_users=True ) + if pagin_config.direction == Direction.BACKWARDS: + # if we're going backwards, we might need to backfill. This + # requires that we have a topo token. + if room_token.topological: + curr_topo = room_token.topological + else: + curr_topo = await self.store.get_current_topological_token( + room_id, room_token.stream + ) + # If they have left the room then clamp the token to be before # they left the room, to save the effort of loading from the # database. @@ -520,13 +530,9 @@ async def get_messages( found_big_gap = False number_of_gaps = 0 - previous_event_depth = None + previous_event_depth = sorted_event_depths[0] for event_depth in sorted_event_depths: - depth_gap = ( - event_depth - previous_event_depth - if previous_event_depth is not None - else 0 - ) + depth_gap = event_depth - previous_event_depth if depth_gap > 0: number_of_gaps += 1 @@ -544,15 +550,6 @@ async def get_messages( break previous_event_depth = event_depth - # if we're going backwards, we might need to backfill. This - # requires that we have a topo token. - if room_token.topological: - curr_topo = room_token.topological - else: - curr_topo = await self.store.get_current_topological_token( - room_id, room_token.stream - ) - # Backfill in the foreground if we found a big gap and want to fill in # that history. # From cd61e745e729ae44f1efde0c3f3a9c24b3e2979a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Jun 2023 17:27:09 -0500 Subject: [PATCH 05/12] Backfill in the foreground when not enough messages to fill the limit --- synapse/handlers/pagination.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 9223cad55344..62adf762729a 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -514,7 +514,7 @@ async def get_messages( to_room_key = pagin_config.to_token.room_key # Initially fetch the events from the database. With any luck, we can return - # these without having to backfill anything (handled below). + # these without blocking on backfill (handled below). events, next_key = await self.store.paginate_room_events( room_id=room_id, from_key=from_token.room_key, @@ -551,11 +551,9 @@ async def get_messages( previous_event_depth = event_depth # Backfill in the foreground if we found a big gap and want to fill in - # that history. - # - # TODO: Should we also consider not enough events to fill the `limit` as - # cause to backfill in the foreground? - if found_big_gap: + # that history or we don't have enough events to fill the limit that the + # client asked for. + if found_big_gap or len(events) < pagin_config.limit: did_backfill = ( await self.hs.get_federation_handler().maybe_backfill( room_id, From da9421793841b43942e4fed0953bc63fc5423552 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 00:33:36 -0500 Subject: [PATCH 06/12] Fix gap logic --- synapse/handlers/pagination.py | 54 ++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 62adf762729a..f23f4d9032f1 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -40,6 +40,10 @@ logger = logging.getLogger(__name__) +# How many single events we tolerate returning in a `/messages` response before we +# backfill and try to fill in the history +BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3 + @attr.s(slots=True, auto_attribs=True) class PurgeStatus: @@ -525,6 +529,9 @@ async def get_messages( ) if pagin_config.direction == Direction.BACKWARDS: + # We use a `Set` because there can be multiple events at a given depth + # and we only care about looking at the unique continum of depths to + # find gaps. event_depths: Set[int] = {event.depth for event in events} sorted_event_depths = sorted(event_depths) @@ -532,28 +539,45 @@ async def get_messages( number_of_gaps = 0 previous_event_depth = sorted_event_depths[0] for event_depth in sorted_event_depths: - depth_gap = event_depth - previous_event_depth - if depth_gap > 0: + # We don't expect a negative depth but we'll just deal with it in + # any case by taking the absolute value to get the true gap between + # any two integers. + depth_gap = abs(event_depth - previous_event_depth) + # A `depth_gap` of 1 is a normal continuous chain to the next event + # (1 <-- 2 <-- 3) so anything larger indicates a missing event (it's + # also possible there is no event at a given depth but we can't ever + # know that for sure) + if depth_gap > 1: number_of_gaps += 1 - # We only tolerate a single-event long gaps in the returned events - # because those are most likely just events we've failed to pull in the - # past. Anything longer than that is probably a sign that we're missing - # a decent chunk of history and we should try to backfill it. + # We only tolerate a small number single-event long gaps in the + # returned events because those are most likely just events we've + # failed to pull in the past. Anything longer than that is probably + # a sign that we're missing a decent chunk of history and we should + # try to backfill it. # - # XXX: It's possible we could tolerate longer gaps if we checked that a - # given events `prev_events` is one that has failed pull attempts and we - # could just treat it like a dead branch of history for now or at least - # something that we don't need the block the client on to try pulling. - if depth_gap > 1: + # XXX: It's possible we could tolerate longer gaps if we checked + # that a given events `prev_events` is one that has failed pull + # attempts and we could just treat it like a dead branch of history + # for now or at least something that we don't need the block the + # client on to try pulling. + if depth_gap > 2: found_big_gap = True break previous_event_depth = event_depth - # Backfill in the foreground if we found a big gap and want to fill in - # that history or we don't have enough events to fill the limit that the - # client asked for. - if found_big_gap or len(events) < pagin_config.limit: + # Backfill in the foreground if we found a big gap, have too many holes, + # or we don't have enough events to fill the limit that the client asked + # for. + missing_too_many_events = ( + number_of_gaps > BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD + ) + not_enough_events_to_fill_response = len(events) < pagin_config.limit + if ( + found_big_gap + or missing_too_many_events + or not_enough_events_to_fill_response + ): did_backfill = ( await self.hs.get_federation_handler().maybe_backfill( room_id, From fd349c4bf3e009563dee49d28f8e895ef40c2aba Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 00:48:49 -0500 Subject: [PATCH 07/12] Protect from no events --- synapse/handlers/pagination.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index f23f4d9032f1..33195548ec4d 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -537,7 +537,9 @@ async def get_messages( found_big_gap = False number_of_gaps = 0 - previous_event_depth = sorted_event_depths[0] + previous_event_depth = ( + sorted_event_depths[0] if len(sorted_event_depths) > 0 else 0 + ) for event_depth in sorted_event_depths: # We don't expect a negative depth but we'll just deal with it in # any case by taking the absolute value to get the true gap between From 9a0156b455324be294f267bbccce47c6c034c4a6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 01:17:19 -0500 Subject: [PATCH 08/12] Document magic number --- synapse/handlers/pagination.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 33195548ec4d..c708b488ecd7 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -41,7 +41,8 @@ logger = logging.getLogger(__name__) # How many single events we tolerate returning in a `/messages` response before we -# backfill and try to fill in the history +# backfill and try to fill in the history. This is an arbitrarily picked number and may +# need to be tuned in the future. BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3 From 7740e8b9eacfdef6e411b2f94f236e3de491574c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 01:19:07 -0500 Subject: [PATCH 09/12] Document MSC3871 gap possibility --- synapse/handlers/pagination.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index c708b488ecd7..e189dff4bda2 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -564,6 +564,10 @@ async def get_messages( # attempts and we could just treat it like a dead branch of history # for now or at least something that we don't need the block the # client on to try pulling. + # + # XXX: If we had something like MSC3871 to indicate gaps in the + # timeline to the client, we could also get away with any sized gap + # and just have the client refetch the holes as they see fit. if depth_gap > 2: found_big_gap = True break From 6c68b727bb3e13520f47782f95c7383a35234418 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 16:56:13 -0500 Subject: [PATCH 10/12] Missed word in comment --- synapse/handlers/pagination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index e189dff4bda2..e7a0ecbb681d 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) -# How many single events we tolerate returning in a `/messages` response before we +# How many single event gaps we tolerate returning in a `/messages` response before we # backfill and try to fill in the history. This is an arbitrarily picked number and may # need to be tuned in the future. BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3 From 000923a127f9b734ed25c99f87e34b33fe54c4c3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 16:56:57 -0500 Subject: [PATCH 11/12] Better tune wording --- synapse/handlers/pagination.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index e7a0ecbb681d..faf137bb832c 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -41,8 +41,8 @@ logger = logging.getLogger(__name__) # How many single event gaps we tolerate returning in a `/messages` response before we -# backfill and try to fill in the history. This is an arbitrarily picked number and may -# need to be tuned in the future. +# backfill and try to fill in the history. This is an arbitrarily picked number so feel +# free to tune it in the future. BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3 From db68fcce5348344ca3464f5deab1c3602626b91f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 8 Jun 2023 17:02:15 -0500 Subject: [PATCH 12/12] Comment what the loop is for --- synapse/handlers/pagination.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index faf137bb832c..19b8728db9b9 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -536,6 +536,7 @@ async def get_messages( event_depths: Set[int] = {event.depth for event in events} sorted_event_depths = sorted(event_depths) + # Inspect the depths of the returned events to see if there are any gaps found_big_gap = False number_of_gaps = 0 previous_event_depth = (