Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add metrics to track /messages response time by room size #13545

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13533.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Track HTTP response times over 10 seconds from `/messages` (`synapse_room_message_list_rest_servlet_response_time_seconds`).
1 change: 1 addition & 0 deletions changelog.d/13545.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update metrics to track `/messages` response time by room size.
79 changes: 78 additions & 1 deletion synapse/rest/client/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
""" This module contains REST servlets to do with rooms: /rooms/<paths> """
import logging
import re
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
from enum import Enum
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple, Type, TypeVar
from urllib import parse as urlparse

from prometheus_client.core import Histogram

from twisted.web.server import Request

from synapse import event_auth
Expand Down Expand Up @@ -61,6 +64,69 @@
logger = logging.getLogger(__name__)


T_RoomSize = TypeVar("T_RoomSize", bound="_RoomSize")


class _RoomSize(Enum):
"""
Enum to differentiate sizes of rooms. This is a pretty good aproximation
about how hard it will be to get events in the room. We could also look at
room "complexity".
"""

# This doesn't necessarily mean the room is a DM, just that there is a DM
# amount of people there.
DM_SIZE = "direct_message_size"
SMALL = "small"
SUBSTANTIAL = "substantial"
LARGE = "large"

@classmethod
def from_member_count(cls: Type[T_RoomSize], member_count: int) -> "_RoomSize":
if member_count <= 2:
return _RoomSize.DM_SIZE
elif member_count < 100:
return _RoomSize.SMALL
elif member_count < 1000:
return _RoomSize.SUBSTANTIAL
else:
return _RoomSize.LARGE


# This is an extra metric on top of `synapse_http_server_response_time_seconds`
# which times the same sort of thing but this one allows us to see values
# greater than 10s. We use a separate dedicated histogram with its own buckets
# so that we don't increase the cardinality of the general one because it's
# multiplied across hundreds of servlets.
messsages_response_timer = Histogram(
"synapse_room_message_list_rest_servlet_response_time_seconds",
"sec",
# We have a label for room size so we can try to see a more realistic
# picture of /messages response time for bigger rooms. We don't want the
# tiny rooms that can always respond fast skewing our results when we're trying
# to optimize the bigger cases.
["room_size"],
buckets=(
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
10.0,
30.0,
60.0,
120.0,
180.0,
"+Inf",
),
)


class TransactionRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
Expand Down Expand Up @@ -556,13 +622,18 @@ class RoomMessageListRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
self._hs = hs
self.clock = hs.get_clock()
self.pagination_handler = hs.get_pagination_handler()
self.auth = hs.get_auth()
self.store = hs.get_datastores().main

async def on_GET(
self, request: SynapseRequest, room_id: str
) -> Tuple[int, JsonDict]:
processing_start_time = self.clock.time_msec()
# Fire and forget and hope that we get a result by the end.
room_member_count_co = self.store.get_number_joined_users_in_room(room_id)

requester = await self.auth.get_user_by_req(request, allow_guest=True)
pagination_config = await PaginationConfig.from_request(
self.store, request, default_limit=10
Expand Down Expand Up @@ -593,6 +664,12 @@ async def on_GET(
event_filter=event_filter,
)

processing_end_time = self.clock.time_msec()
room_member_count = await room_member_count_co
messsages_response_timer.labels(
room_size=_RoomSize.from_member_count(room_member_count)
).observe((processing_start_time - processing_end_time) / 1000)

return 200, msgs


Expand Down