diff --git a/changelog.d/4415.feature b/changelog.d/4415.feature new file mode 100644 index 000000000000..1fb1d58f8f23 --- /dev/null +++ b/changelog.d/4415.feature @@ -0,0 +1 @@ +Search now includes results from predecessor rooms after a room upgrade. \ No newline at end of file diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 16ad65486480..3906475403ac 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,6 +444,20 @@ def lazy_load_members(self): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) + def with_room_ids(self, room_ids): + """Returns a new filter with the given room IDs appended. + + Args: + room_ids (iterable[unicode]): The room_ids to add + + Returns: + filter: A new filter including the given rooms and the old + filter's rooms. + """ + newFilter = Filter(self.filter_json) + newFilter.rooms += room_ids + return newFilter + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index ec936bbb4e1f..49c439313ec2 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -37,6 +37,41 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def get_old_rooms_from_upgraded_room(self, room_id): + """Retrieves room IDs of old rooms in the history of an upgraded room. + + We do so by checking the m.room.create event of the room for a + `predecessor` key. If it exists, we add the room ID to our return + list and then check that room for a m.room.create event and so on + until we can no longer find any more previous rooms. + + The full list of all found rooms in then returned. + + Args: + room_id (str): id of the room to search through. + + Returns: + Deferred[iterable[unicode]]: predecessor room ids + """ + + historical_room_ids = [] + + while True: + predecessor = yield self.store.get_room_predecessor(room_id) + + # If no predecessor, assume we've hit a dead end + if not predecessor: + break + + # Add predecessor's room ID + historical_room_ids.append(predecessor["room_id"]) + + # Scan through the old room for further predecessors + room_id = predecessor["room_id"] + + defer.returnValue(historical_room_ids) + @defer.inlineCallbacks def search(self, user, content, batch=None): """Performs a full text search for a user. @@ -137,6 +172,18 @@ def search(self, user, content, batch=None): ) room_ids = set(r.room_id for r in rooms) + # If doing a subset of all rooms seearch, check if any of the rooms + # are from an upgraded room, and search their contents as well + if search_filter.rooms: + historical_room_ids = [] + for room_id in search_filter.rooms: + # Add any previous rooms to the search if they exist + ids = yield self.get_old_rooms_from_upgraded_room(room_id) + historical_room_ids += ids + + # Prevent any historical events from being filtered + search_filter = search_filter.with_room_ids(historical_room_ids) + room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a134e9b3e805..c3ab7db7ae4e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -437,6 +437,30 @@ def get_room_version(self, room_id): create_event = yield self.get_event(create_id) defer.returnValue(create_event.content.get("room_version", "1")) + @defer.inlineCallbacks + def get_room_predecessor(self, room_id): + """Get the predecessor room of an upgraded room if one exists. + Otherwise return None. + + Args: + room_id (str) + + Returns: + Deferred[unicode|None]: predecessor room id + """ + state_ids = yield self.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + defer.returnValue(None) + + # Retrieve the room's create event + create_event = yield self.get_event(create_id) + + # Return predecessor if present + defer.returnValue(create_event.content.get("predecessor", None)) + @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): """Get the current state event ids for a room based on the