From 10ea3f46ba3eda2f7c220a5e5902b687feb3042c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Jun 2016 17:55:32 +0100 Subject: [PATCH 1/2] Change the way we cache events --- synapse/storage/events.py | 80 ++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b710505a7e16..779743b8f901 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -139,6 +139,9 @@ def _get_drainining_queue(self, room_id): pass +_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) + + class EventsStore(SQLBaseStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" @@ -741,7 +744,6 @@ def _get_events(self, event_ids, check_redacted=True, event_map = self._get_events_from_cache( event_ids, check_redacted=check_redacted, - get_prev_content=get_prev_content, allow_rejected=allow_rejected, ) @@ -751,40 +753,49 @@ def _get_events(self, event_ids, check_redacted=True, missing_events = yield self._enqueue_events( missing_events_ids, check_redacted=check_redacted, - get_prev_content=get_prev_content, allow_rejected=allow_rejected, ) event_map.update(missing_events) - defer.returnValue([ + events = [ event_map[e_id] for e_id in event_id_list if e_id in event_map and event_map[e_id] - ]) + ] + + if get_prev_content: + for event in events: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) def _invalidate_get_event_cache(self, event_id): - for check_redacted in (False, True): - for get_prev_content in (False, True): - self._get_event_cache.invalidate( - (event_id, check_redacted, get_prev_content) - ) + self._get_event_cache.invalidate((event_id,)) - def _get_events_from_cache(self, events, check_redacted, get_prev_content, - allow_rejected): + def _get_events_from_cache(self, events, check_redacted, allow_rejected): event_map = {} for event_id in events: - try: - ret = self._get_event_cache.get( - (event_id, check_redacted, get_prev_content,) - ) + ret = self._get_event_cache.get((event_id,), None) + if not ret: + continue - if allow_rejected or not ret.rejected_reason: - event_map[event_id] = ret + if allow_rejected or not ret.event.rejected_reason: + if check_redacted and ret.redacted_event: + event_map[event_id] = ret.redacted_event else: - event_map[event_id] = None - except KeyError: - pass + event_map[event_id] = ret.event + else: + event_map[event_id] = None return event_map @@ -855,8 +866,7 @@ def fire(evs): reactor.callFromThread(fire, event_list) @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, - get_prev_content=False, allow_rejected=False): + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. @@ -895,7 +905,6 @@ def _enqueue_events(self, events, check_redacted=True, preserve_fn(self._get_event_from_row)( row["internal_metadata"], row["json"], row["redacts"], check_redacted=check_redacted, - get_prev_content=get_prev_content, rejected_reason=row["rejects"], ) for row in rows @@ -936,8 +945,7 @@ def _fetch_event_rows(self, txn, events): @defer.inlineCallbacks def _get_event_from_row(self, internal_metadata, js, redacted, - check_redacted=True, get_prev_content=False, - rejected_reason=None): + check_redacted=True, rejected_reason=None): d = json.loads(js) internal_metadata = json.loads(internal_metadata) @@ -949,14 +957,17 @@ def _get_event_from_row(self, internal_metadata, js, redacted, desc="_get_event_from_row", ) - ev = FrozenEvent( + original_ev = FrozenEvent( d, internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, ) + ev = original_ev + redacted_event = None if check_redacted and redacted: ev = prune_event(ev) + redacted_event = ev redaction_id = yield self._simple_select_one_onecol( table="redactions", @@ -979,19 +990,10 @@ def _get_event_from_row(self, internal_metadata, js, redacted, # will serialise this field correctly ev.unsigned["redacted_because"] = because - if get_prev_content and "replaces_state" in ev.unsigned: - prev = yield self.get_event( - ev.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - ev.unsigned["prev_content"] = prev.content - ev.unsigned["prev_sender"] = prev.sender - - self._get_event_cache.prefill( - (ev.event_id, check_redacted, get_prev_content), ev - ) + self._get_event_cache.prefill((ev.event_id,), _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + )) defer.returnValue(ev) From cffe46408f40db082df76adc263cf5014031ae54 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Jun 2016 18:25:21 +0100 Subject: [PATCH 2/2] Don't rely on options when inserting event into cache --- synapse/storage/events.py | 83 ++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 779743b8f901..5db24e86f97f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -741,13 +741,12 @@ def _get_events(self, event_ids, check_redacted=True, event_id_list = event_ids event_ids = set(event_ids) - event_map = self._get_events_from_cache( + event_entry_map = self._get_events_from_cache( event_ids, - check_redacted=check_redacted, allow_rejected=allow_rejected, ) - missing_events_ids = [e for e in event_ids if e not in event_map] + missing_events_ids = [e for e in event_ids if e not in event_entry_map] if missing_events_ids: missing_events = yield self._enqueue_events( @@ -756,32 +755,40 @@ def _get_events(self, event_ids, check_redacted=True, allow_rejected=allow_rejected, ) - event_map.update(missing_events) + event_entry_map.update(missing_events) - events = [ - event_map[e_id] for e_id in event_id_list - if e_id in event_map and event_map[e_id] - ] + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue - if get_prev_content: - for event in events: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender defer.returnValue(events) def _invalidate_get_event_cache(self, event_id): self._get_event_cache.invalidate((event_id,)) - def _get_events_from_cache(self, events, check_redacted, allow_rejected): + def _get_events_from_cache(self, events, allow_rejected): event_map = {} for event_id in events: @@ -790,10 +797,7 @@ def _get_events_from_cache(self, events, check_redacted, allow_rejected): continue if allow_rejected or not ret.event.rejected_reason: - if check_redacted and ret.redacted_event: - event_map[event_id] = ret.redacted_event - else: - event_map[event_id] = ret.event + event_map[event_id] = ret else: event_map[event_id] = None @@ -904,7 +908,6 @@ def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): [ preserve_fn(self._get_event_from_row)( row["internal_metadata"], row["json"], row["redacts"], - check_redacted=check_redacted, rejected_reason=row["rejects"], ) for row in rows @@ -913,7 +916,7 @@ def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): ) defer.returnValue({ - e.event_id: e + e.event.event_id: e for e in res if e }) @@ -945,7 +948,7 @@ def _fetch_event_rows(self, txn, events): @defer.inlineCallbacks def _get_event_from_row(self, internal_metadata, js, redacted, - check_redacted=True, rejected_reason=None): + rejected_reason=None): d = json.loads(js) internal_metadata = json.loads(internal_metadata) @@ -954,7 +957,7 @@ def _get_event_from_row(self, internal_metadata, js, redacted, table="rejections", keyvalues={"event_id": rejected_reason}, retcol="reason", - desc="_get_event_from_row", + desc="_get_event_from_row_rejected_reason", ) original_ev = FrozenEvent( @@ -963,20 +966,18 @@ def _get_event_from_row(self, internal_metadata, js, redacted, rejected_reason=rejected_reason, ) - ev = original_ev redacted_event = None - if check_redacted and redacted: - ev = prune_event(ev) - redacted_event = ev + if redacted: + redacted_event = prune_event(original_ev) redaction_id = yield self._simple_select_one_onecol( table="redactions", - keyvalues={"redacts": ev.event_id}, + keyvalues={"redacts": redacted_event.event_id}, retcol="event_id", - desc="_get_event_from_row", + desc="_get_event_from_row_redactions", ) - ev.unsigned["redacted_by"] = redaction_id + redacted_event.unsigned["redacted_by"] = redaction_id # Get the redaction event. because = yield self.get_event( @@ -988,14 +989,16 @@ def _get_event_from_row(self, internal_metadata, js, redacted, if because: # It's fine to do add the event directly, since get_pdu_json # will serialise this field correctly - ev.unsigned["redacted_because"] = because + redacted_event.unsigned["redacted_because"] = because - self._get_event_cache.prefill((ev.event_id,), _EventCacheEntry( + cache_entry = _EventCacheEntry( event=original_ev, redacted_event=redacted_event, - )) + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - defer.returnValue(ev) + defer.returnValue(cache_entry) @defer.inlineCallbacks def count_daily_messages(self):