-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Add all local users to the user_directory and optionally search them #2723
Changes from 8 commits
47d99a2
3241c7a
cd3697e
a4bb133
4b1fceb
f61e107
5406392
f397153
1bd40ca
74e0cc7
95f8a71
7b86c1f
b11dca2
c22e732
cdc2cb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# -*- coding: utf-8 -*- | ||
# Copyright 2017 New Vector Ltd | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from ._base import Config | ||
|
||
|
||
class UserDirectoryConfig(Config): | ||
"""User Directory Configuration | ||
Configuration for the behaviour of the /user_directory API | ||
""" | ||
|
||
def read_config(self, config): | ||
self.user_directory_include_pattern = None | ||
user_directory_config = config.get("user_directory", None) | ||
if user_directory_config: | ||
self.user_directory_include_pattern = ( | ||
user_directory_config.get("include_pattern", None) | ||
) | ||
|
||
def default_config(self, config_dir_path, server_name, **kwargs): | ||
return """ | ||
# User Directory configuration | ||
# 'include_pattern' defines an optional SQL LIKE pattern when querying the | ||
# user directory in addition to publicly visible users. Defaults to None. | ||
# | ||
#user_directory: | ||
# include_pattern: "%%:%s" | ||
""" % (server_name) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,12 +20,13 @@ | |
from synapse.storage.roommember import ProfileInfo | ||
from synapse.util.metrics import Measure | ||
from synapse.util.async import sleep | ||
from synapse.types import get_localpart_from_id | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class UserDirectoyHandler(object): | ||
class UserDirectoryHandler(object): | ||
"""Handles querying of and keeping updated the user_directory. | ||
|
||
N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY | ||
|
@@ -53,6 +54,7 @@ def __init__(self, hs): | |
self.notifier = hs.get_notifier() | ||
self.is_mine_id = hs.is_mine_id | ||
self.update_user_directory = hs.config.update_user_directory | ||
self.include_pattern = hs.config.user_directory_include_pattern | ||
|
||
# When start up for the first time we need to populate the user_directory. | ||
# This is a set of user_id's we've inserted already | ||
|
@@ -110,6 +112,15 @@ def notify_new_event(self): | |
finally: | ||
self._is_processing = False | ||
|
||
@defer.inlineCallbacks | ||
def handle_local_profile_change(self, user_id, profile): | ||
"""Called to update index of our local user profiles when they change | ||
irrespective of any rooms the user may be in. | ||
""" | ||
yield self.store.update_profile_in_user_dir( | ||
user_id, profile.display_name, profile.avatar_url, None, | ||
) | ||
|
||
@defer.inlineCallbacks | ||
def _unsafe_process(self): | ||
# If self.pos is None then means we haven't fetched it from DB | ||
|
@@ -148,16 +159,30 @@ def _do_initial_spam(self): | |
room_ids = yield self.store.get_all_rooms() | ||
|
||
logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) | ||
num_processed_rooms = 1 | ||
num_processed_rooms = 0 | ||
|
||
for room_id in room_ids: | ||
logger.info("Handling room %d/%d", num_processed_rooms, len(room_ids)) | ||
logger.info("Handling room %d/%d", num_processed_rooms+1, len(room_ids)) | ||
yield self._handle_initial_room(room_id) | ||
num_processed_rooms += 1 | ||
yield sleep(self.INITIAL_SLEEP_MS / 1000.) | ||
|
||
logger.info("Processed all rooms.") | ||
|
||
if self.include_pattern: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact, I would be tempted to do this all the time and do the checks when we filter. Otherwise we need to keep track of people toggling the option on and off There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i deliberately wasn't doing this (and instead thought the user can blow away the user_dir caches if they change the option), as for bigger servers it's going to make the user_directory and user_directory_search tables enormous, and slow down the FTS for a feature which may not even be being used... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that we add users to the table when they are in a room with anyone else, wouldn't the vast majority of users already be in there? |
||
num_processed_users = 0 | ||
user_ids = yield self.store.get_all_local_users() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to include appservice users here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also worried that pulling out millions of users is going to be painful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And its going to take matrix.org more than a day on the default settings to get through all its users There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm, point. so your suggestion is to batch? and yes, we probably should be including AS users. |
||
logger.info("Doing initial update of user directory. %d users", len(user_ids)) | ||
for user_id in user_ids: | ||
# We add profiles for all users even if they don't match the | ||
# include pattern, just in case we want to change it in future | ||
logger.info("Handling user %d/%d", num_processed_users+1, len(user_ids)) | ||
yield self._handle_local_user(user_id) | ||
num_processed_users += 1 | ||
yield sleep(self.INITIAL_SLEEP_MS / 1000.) | ||
|
||
logger.info("Processed all users") | ||
|
||
self.initially_handled_users = None | ||
self.initially_handled_users_in_public = None | ||
self.initially_handled_users_share = None | ||
|
@@ -384,15 +409,30 @@ def _handle_room_publicity_change(self, room_id, prev_event_id, event_id, typ): | |
for user_id in users: | ||
yield self._handle_remove_user(room_id, user_id) | ||
|
||
@defer.inlineCallbacks | ||
def _handle_local_user(self, user_id): | ||
"""Adds a new local roomless user into the user_directory_search table. | ||
Used to populate up the user index when we have an | ||
user_directory_include_pattern specified. | ||
""" | ||
logger.debug("Adding new local user to dir, %r", user_id) | ||
|
||
profile = yield self.store.get_profileinfo(get_localpart_from_id(user_id)) | ||
|
||
row = yield self.store.get_user_in_directory(user_id) | ||
if not row: | ||
yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) | ||
|
||
|
||
@defer.inlineCallbacks | ||
def _handle_new_user(self, room_id, user_id, profile): | ||
"""Called when we might need to add user to directory | ||
|
||
Args: | ||
room_id (str): room_id that user joined or started being public that | ||
room_id (str): room_id that user joined or started being public | ||
user_id (str) | ||
""" | ||
logger.debug("Adding user to dir, %r", user_id) | ||
logger.debug("Adding new user to dir, %r", user_id) | ||
|
||
row = yield self.store.get_user_in_directory(user_id) | ||
if not row: | ||
|
@@ -407,7 +447,7 @@ def _handle_new_user(self, room_id, user_id, profile): | |
if not row: | ||
yield self.store.add_users_to_public_room(room_id, [user_id]) | ||
else: | ||
logger.debug("Not adding user to public dir, %r", user_id) | ||
logger.debug("Not adding new user to public dir, %r", user_id) | ||
|
||
# Now we update users who share rooms with users. We do this by getting | ||
# all the current users in the room and seeing which aren't already | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,9 @@ | |
|
||
from twisted.internet import defer | ||
|
||
from synapse.storage.roommember import ProfileInfo | ||
from synapse.api.errors import StoreError | ||
|
||
from ._base import SQLBaseStore | ||
|
||
|
||
|
@@ -26,6 +29,30 @@ def create_profile(self, user_localpart): | |
desc="create_profile", | ||
) | ||
|
||
@defer.inlineCallbacks | ||
def get_profileinfo(self, user_localpart): | ||
try: | ||
profile = yield self._simple_select_one( | ||
table="profiles", | ||
keyvalues={"user_id": user_localpart}, | ||
retcols=("displayname", "avatar_url"), | ||
desc="get_profileinfo", | ||
) | ||
except StoreError, e: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should really be using |
||
if e.code == 404: | ||
# no match | ||
defer.returnValue(ProfileInfo(None, None)) | ||
return | ||
else: | ||
raise | ||
|
||
defer.returnValue( | ||
ProfileInfo( | ||
avatar_url=profile['avatar_url'], | ||
display_name=profile['displayname'], | ||
) | ||
) | ||
|
||
def get_profile_displayname(self, user_localpart): | ||
return self._simple_select_one_onecol( | ||
table="profiles", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* Copyright 2017 New Vector Ltd | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
-- change the user_directory table to also cover global local user profiles | ||
-- rather than just profiles within specific rooms. | ||
|
||
CREATE TABLE user_directory2 ( | ||
user_id TEXT NOT NULL, | ||
room_id TEXT, | ||
display_name TEXT, | ||
avatar_url TEXT | ||
); | ||
|
||
INSERT INTO user_directory2(user_id, room_id, display_name, avatar_url) | ||
SELECT user_id, room_id, display_name, avatar_url from user_directory; | ||
|
||
DROP TABLE user_directory; | ||
ALTER TABLE user_directory2 RENAME TO user_directory; | ||
|
||
-- create indexes after doing the inserts because that's more efficient. | ||
-- it also means we can give it the same name as the old one without renaming. | ||
CREATE INDEX user_directory_room_idx ON user_directory(room_id); | ||
CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would move the if and fetching of profile into
handle_local_profile_change
rather than duplicating it everywhereThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mm, point.