From df3a661e4adb7676682a5e3c298a2dfda18b08a1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 10:04:47 +0000 Subject: [PATCH 1/8] Search for messages across predecessor rooms Signed-off-by: Andrew Morgan --- synapse/api/filtering.py | 3 ++ synapse/handlers/search.py | 69 ++++++++++++++++++++++++++++++++++++++ synapse/storage/state.py | 1 + 3 files changed, 73 insertions(+) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 16ad65486480..84000e64228a 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,6 +444,9 @@ def lazy_load_members(self): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) + def add_room_ids(self, room_ids): + self.rooms += room_ids + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index ec936bbb4e1f..77e7e4e0fb9b 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -37,6 +37,54 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def get_old_rooms_from_upgraded_room(self, room_id): + """Retrieves room IDs of old rooms in the history of an upgraded room. + + We do so by checking the m.room.create event of the room for a + `predecessor` key. If it exists, we add the room ID to our return + list and then check that room for a m.room.create event and so on + until we can no longer find any more previous rooms. + + The full list of all found rooms in then returned. + + Args: + room_id (str): The ID of the room to search through. + + Returns: + dict of past room IDs as strings + """ + + historical_room_ids = [] + + while True: + state_ids = yield self.store.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + break + + # Retrieve the room's create event + create_event = yield self.store.get_event(create_id) + + if not create_event: + break + + # Check if a predecessor room is present + predecessor = create_event.content.get("predecessor", None) + if not predecessor: + break + + # Add predecessor's room ID + historical_room_id = predecessor["room_id"] + historical_room_ids.append(historical_room_id) + + # Scan through the old room for further predecessors + room_id = historical_room_id + + defer.returnValue(historical_room_ids) + @defer.inlineCallbacks def search(self, user, content, batch=None): """Performs a full text search for a user. @@ -139,6 +187,27 @@ def search(self, user, content, batch=None): room_ids = search_filter.filter_rooms(room_ids) + # If doing a subset of all rooms seearch, check if any of the rooms + # are from an upgraded room, and search their contents as well + # XXX: There is the possibility that we don't have a create event for + # the room in question, in which case we can't return all the results + # we want to. + # Ideally we would just return the results we can get now, and + # try to get more results from other servers in the background. + if search_filter.rooms: + historical_room_ids = [] + for room_id in room_ids: + # Add any previous rooms to the search if they exist + ids = yield self.get_old_rooms_from_upgraded_room(room_id) + historical_room_ids += ids + + # Add any found rooms to the list to search + for historical_room_id in historical_room_ids: + room_ids.add(historical_room_id) + + # Prevent any historical events from being filtered + search_filter.add_room_ids(historical_room_ids) + if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a134e9b3e805..49b3ff4a7119 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,6 +448,7 @@ def get_current_state_ids(self, room_id): Returns: deferred: dict of (type, state_key) -> event_id """ + def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events From cb80db894186c4c9a4991e0623530a038eb82543 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 11:22:00 +0000 Subject: [PATCH 2/8] Add changelog --- changelog.d/4415.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/4415.feature diff --git a/changelog.d/4415.feature b/changelog.d/4415.feature new file mode 100644 index 000000000000..1fb1d58f8f23 --- /dev/null +++ b/changelog.d/4415.feature @@ -0,0 +1 @@ +Search now includes results from predecessor rooms after a room upgrade. \ No newline at end of file From c9bfb058d85f6205fada062c78a4d1eca119417c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 11:12:48 +0000 Subject: [PATCH 3/8] Fix a bug with single-room search searching all rooms * Create a new method for getting predecessor rooms * Remove formatting change --- synapse/api/filtering.py | 15 ++++++++++++-- synapse/handlers/search.py | 42 +++++++++----------------------------- synapse/storage/state.py | 29 +++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 84000e64228a..0d8957175d87 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,8 +444,19 @@ def lazy_load_members(self): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) - def add_room_ids(self, room_ids): - self.rooms += room_ids + def with_room_ids(self, room_ids): + """Returns a new filter with the given room IDs appended. + + Args: + room_ids (list): A list of room_ids. + + Returns: + filter: A new filter including the given rooms and the old + filter's rooms. + """ + newFilter = self + newFilter.rooms += room_ids + return newFilter def _matches_wildcard(actual_value, filter_value): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 77e7e4e0fb9b..75c26fe065c1 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -49,39 +49,26 @@ def get_old_rooms_from_upgraded_room(self, room_id): The full list of all found rooms in then returned. Args: - room_id (str): The ID of the room to search through. + room_id (str): id of the room to search through. Returns: - dict of past room IDs as strings + Deferred[iterable[str]]: predecessor room ids """ historical_room_ids = [] while True: - state_ids = yield self.store.get_current_state_ids(room_id) - create_id = state_ids.get((EventTypes.Create, "")) + predecessor = yield self.store.get_room_predecessor(room_id) - # If we can't find the create event, assume we've hit a dead end - if not create_id: - break - - # Retrieve the room's create event - create_event = yield self.store.get_event(create_id) - - if not create_event: - break - - # Check if a predecessor room is present - predecessor = create_event.content.get("predecessor", None) + # If no predecessor, assume we've hit a dead end if not predecessor: break # Add predecessor's room ID - historical_room_id = predecessor["room_id"] - historical_room_ids.append(historical_room_id) + historical_room_ids.append(predecessor["room_id"]) # Scan through the old room for further predecessors - room_id = historical_room_id + room_id = predecessor["room_id"] defer.returnValue(historical_room_ids) @@ -185,28 +172,19 @@ def search(self, user, content, batch=None): ) room_ids = set(r.room_id for r in rooms) - room_ids = search_filter.filter_rooms(room_ids) - # If doing a subset of all rooms seearch, check if any of the rooms # are from an upgraded room, and search their contents as well - # XXX: There is the possibility that we don't have a create event for - # the room in question, in which case we can't return all the results - # we want to. - # Ideally we would just return the results we can get now, and - # try to get more results from other servers in the background. if search_filter.rooms: historical_room_ids = [] - for room_id in room_ids: + for room_id in search_filter.rooms: # Add any previous rooms to the search if they exist ids = yield self.get_old_rooms_from_upgraded_room(room_id) historical_room_ids += ids - # Add any found rooms to the list to search - for historical_room_id in historical_room_ids: - room_ids.add(historical_room_id) - # Prevent any historical events from being filtered - search_filter.add_room_ids(historical_room_ids) + search_filter = search_filter.with_room_ids(historical_room_ids) + + room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 49b3ff4a7119..b0646718516d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -437,6 +437,34 @@ def get_room_version(self, room_id): create_event = yield self.get_event(create_id) defer.returnValue(create_event.content.get("room_version", "1")) + @defer.inlineCallbacks + def get_room_predecessor(self, room_id): + """Get the predecessor room of an upgraded room if one exists. + Otherwise return None. + + Args: + room_id (str) + + Returns: + Deferred[str]: predecessor room id + """ + + state_ids = yield self.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + return None + + # Retrieve the room's create event + create_event = yield self.get_event(create_id) + + if not create_event: + return None + + # Return predecessor if present + return create_event.content.get("predecessor", None) + @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): """Get the current state event ids for a room based on the @@ -448,7 +476,6 @@ def get_current_state_ids(self, room_id): Returns: deferred: dict of (type, state_key) -> event_id """ - def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events From c433f6109145f0cf6c80dd07ee118b68a3a0cd4e Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 12:06:36 +0000 Subject: [PATCH 4/8] Ensure new filter is actually created --- synapse/api/filtering.py | 2 +- synapse/storage/state.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 0d8957175d87..f3a056110f50 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -454,7 +454,7 @@ def with_room_ids(self, room_ids): filter: A new filter including the given rooms and the old filter's rooms. """ - newFilter = self + newFilter = Filter(self.filter_json) newFilter.rooms += room_ids return newFilter diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b0646718516d..981d1e360051 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,7 +448,6 @@ def get_room_predecessor(self, room_id): Returns: Deferred[str]: predecessor room id """ - state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) From 277e50462d1422ac8cfe2df7cd2213288f3d14c5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 12:40:26 +0000 Subject: [PATCH 5/8] Do not return in a deferred function --- synapse/storage/state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 981d1e360051..fceb9744aa1b 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -453,16 +453,16 @@ def get_room_predecessor(self, room_id): # If we can't find the create event, assume we've hit a dead end if not create_id: - return None + defer.returnValue(None) # Retrieve the room's create event create_event = yield self.get_event(create_id) if not create_event: - return None + defer.returnValue(None) # Return predecessor if present - return create_event.content.get("predecessor", None) + defer.returnValue(create_event.content.get("predecessor", None)) @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): From 8ea509a9357d53f71e7bef09aae59f53b9f2317e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 24 Jan 2019 17:21:35 +0000 Subject: [PATCH 6/8] Update synapse/api/filtering.py Co-Authored-By: anoadragon453 <1342360+anoadragon453@users.noreply.github.com> --- synapse/api/filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index f3a056110f50..3906475403ac 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -448,7 +448,7 @@ def with_room_ids(self, room_ids): """Returns a new filter with the given room IDs appended. Args: - room_ids (list): A list of room_ids. + room_ids (iterable[unicode]): The room_ids to add Returns: filter: A new filter including the given rooms and the old From 03c85335d1d386c0523af3b6bf992f83bfb905d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 24 Jan 2019 17:22:09 +0000 Subject: [PATCH 7/8] Apply suggestions from code review Co-Authored-By: anoadragon453 <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/search.py | 2 +- synapse/storage/state.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 75c26fe065c1..49c439313ec2 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -52,7 +52,7 @@ def get_old_rooms_from_upgraded_room(self, room_id): room_id (str): id of the room to search through. Returns: - Deferred[iterable[str]]: predecessor room ids + Deferred[iterable[unicode]]: predecessor room ids """ historical_room_ids = [] diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa1b..0a0691cd0001 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -446,7 +446,7 @@ def get_room_predecessor(self, room_id): room_id (str) Returns: - Deferred[str]: predecessor room id + Deferred[unicode|None]: predecessor room id """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) From e1781b043b4a73e1b22142b6e09c07ddd782ae57 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 24 Jan 2019 17:23:39 +0000 Subject: [PATCH 8/8] Remove redundant create event None check --- synapse/storage/state.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa1b..c02130d9d621 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -458,9 +458,6 @@ def get_room_predecessor(self, room_id): # Retrieve the room's create event create_event = yield self.get_event(create_id) - if not create_event: - defer.returnValue(None) - # Return predecessor if present defer.returnValue(create_event.content.get("predecessor", None))