diff --git a/changelog.d/9383.feature b/changelog.d/9383.feature new file mode 100644 index 000000000000..8957c9cc5e4c --- /dev/null +++ b/changelog.d/9383.feature @@ -0,0 +1 @@ +Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index fbbf71edd9fc..c5a5bec4b226 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2542,9 +2542,14 @@ spam_checker: # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # +# 'prefer_local_users' defines whether to prioritise local users in +# search query results. If True, local users are more likely to appear above +# remote users when searching the user directory. Defaults to false. +# #user_directory: # enabled: true # search_all_users: false +# prefer_local_users: false # User Consent configuration diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index c8d19c5d6b4e..89dbebd1480b 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -26,6 +26,7 @@ class UserDirectoryConfig(Config): def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False + self.user_directory_search_prefer_local_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_search_enabled = user_directory_config.get( @@ -34,6 +35,9 @@ def read_config(self, config, **kwargs): self.user_directory_search_all_users = user_directory_config.get( "search_all_users", False ) + self.user_directory_search_prefer_local_users = user_directory_config.get( + "prefer_local_users", False + ) def generate_config_section(self, config_dir_path, server_name, **kwargs): return """ @@ -49,7 +53,12 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # + # 'prefer_local_users' defines whether to prioritise local users in + # search query results. If True, local users are more likely to appear above + # remote users when searching the user directory. Defaults to false. + # #user_directory: # enabled: true # search_all_users: false + # prefer_local_users: false """ diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 7b9729da0958..467738285ff5 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -336,8 +336,7 @@ def _get_next_batch(txn): return len(users_to_work_on) async def is_room_world_readable_or_publicly_joinable(self, room_id): - """Check if the room is either world_readable or publically joinable - """ + """Check if the room is either world_readable or publically joinable""" # Create a state filter that only queries join and history state event types_to_filter = ( @@ -516,8 +515,7 @@ async def add_users_in_public_rooms( ) async def delete_all_from_user_dir(self) -> None: - """Delete the entire user directory - """ + """Delete the entire user directory""" def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") @@ -558,6 +556,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + self._prefer_local_users_in_search = ( + hs.config.user_directory_search_prefer_local_users + ) + self._server_name = hs.config.server_name + async def remove_from_user_dir(self, user_id: str) -> None: def _remove_from_user_dir_txn(txn): self.db_pool.simple_delete_txn( @@ -750,9 +753,24 @@ async def search_user_dir(self, user_id, search_term, limit): ) """ + # We allow manipulating the ranking algorithm by injecting statements + # based on config options. + additional_ordering_statements = [] + ordering_arguments = () + if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prefer_local_users_in_search: + # This statement checks whether a given user's user ID contains a server name + # that matches the local server + statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)" + additional_ordering_statements.append(statement) + + ordering_arguments += ("%:" + self._server_name,) + # We order by rank and then if they have profile info # The ranking algorithm is hand tweaked for "best" results. Broadly # the idea is we give a higher weight to exact matches. @@ -763,7 +781,7 @@ async def search_user_dir(self, user_id, search_term, limit): FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) WHERE - %s + %(where_clause)s AND vector @@ to_tsquery('simple', ?) ORDER BY (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) @@ -783,33 +801,54 @@ async def search_user_dir(self, user_id, search_term, limit): 8 ) ) + %(order_case_statements)s DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( - where_clause, + """ % { + "where_clause": where_clause, + "order_case_statements": " ".join(additional_ordering_statements), + } + args = ( + join_args + + (full_query, exact_query, prefix_query) + + ordering_arguments + + (limit + 1,) ) - args = join_args + (full_query, exact_query, prefix_query, limit + 1) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prefer_local_users_in_search: + # This statement checks whether a given user's user ID contains a server name + # that matches the local server + # + # Note that we need to include a comma at the end for valid SQL + statement = "user_id LIKE ? DESC," + additional_ordering_statements.append(statement) + + ordering_arguments += ("%:" + self._server_name,) + sql = """ SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) WHERE - %s + %(where_clause)s AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, + %(order_statements)s display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( - where_clause, - ) - args = join_args + (search_query, limit + 1) + """ % { + "where_clause": where_clause, + "order_statements": " ".join(additional_ordering_statements), + } + args = join_args + (search_query,) + ordering_arguments + (limit + 1,) else: # This should be unreachable. raise Exception("Unrecognized database engine") diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 9c886d671a1b..ef5e9cd3afc3 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -18,6 +18,7 @@ import synapse.rest.admin from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes +from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.rest.client.v1 import login, room from synapse.rest.client.v2_alpha import user_directory from synapse.storage.roommember import ProfileInfo @@ -46,6 +47,8 @@ def make_homeserver(self, reactor, clock): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() self.handler = hs.get_user_directory_handler() + self.event_builder_factory = self.hs.get_event_builder_factory() + self.event_creation_handler = self.hs.get_event_creation_handler() def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" @@ -541,6 +544,97 @@ def test_initial_share_all_users(self): s = self.get_success(self.handler.search_users(u1, u4, 10)) self.assertEqual(len(s["results"]), 1) + @override_config( + { + "user_directory": { + "enabled": True, + "search_all_users": True, + "prefer_local_users": True, + } + } + ) + def test_prefer_local_users(self): + """Tests that local users are shown higher in search results when + user_directory.prefer_local_users is True. + """ + # Create a room and few users to test the directory with + searching_user = self.register_user("searcher", "password") + searching_user_tok = self.login("searcher", "password") + + room_id = self.helper.create_room_as( + searching_user, + room_version=RoomVersions.V1.identifier, + tok=searching_user_tok, + ) + + # Create a few local users and join them to the room + local_user_1 = self.register_user("user_xxxxx", "password") + local_user_2 = self.register_user("user_bbbbb", "password") + local_user_3 = self.register_user("user_zzzzz", "password") + + self._add_user_to_room(room_id, RoomVersions.V1, local_user_1) + self._add_user_to_room(room_id, RoomVersions.V1, local_user_2) + self._add_user_to_room(room_id, RoomVersions.V1, local_user_3) + + # Create a few "remote" users and join them to the room + remote_user_1 = "@user_aaaaa:remote_server" + remote_user_2 = "@user_yyyyy:remote_server" + remote_user_3 = "@user_ccccc:remote_server" + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_1) + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_2) + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_3) + + local_users = [local_user_1, local_user_2, local_user_3] + remote_users = [remote_user_1, remote_user_2, remote_user_3] + + # Populate the user directory via background update + self._add_background_updates() + while not self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ): + self.get_success( + self.store.db_pool.updates.do_next_background_update(100), by=0.1 + ) + + # The local searching user searches for the term "user", which other users have + # in their user id + results = self.get_success( + self.handler.search_users(searching_user, "user", 20) + )["results"] + received_user_id_ordering = [result["user_id"] for result in results] + + # Typically we'd expect Synapse to return users in lexicographical order, + # assuming they have similar User IDs/display names, and profile information. + + # Check that the order of returned results using our module is as we expect, + # i.e our local users show up first, despite all users having lexographically mixed + # user IDs. + [self.assertIn(user, local_users) for user in received_user_id_ordering[:3]] + [self.assertIn(user, remote_users) for user in received_user_id_ordering[3:]] + + def _add_user_to_room( + self, room_id: str, room_version: RoomVersion, user_id: str, + ): + # Add a user to the room. + builder = self.event_builder_factory.for_room_version( + room_version, + { + "type": "m.room.member", + "sender": user_id, + "state_key": user_id, + "room_id": room_id, + "content": {"membership": "join"}, + }, + ) + + event, context = self.get_success( + self.event_creation_handler.create_new_client_event(builder) + ) + + self.get_success( + self.hs.get_storage().persistence.persist_event(event, context) + ) + class TestUserDirSearchDisabled(unittest.HomeserverTestCase): user_id = "@test:test"