Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #2230 from matrix-org/erikj/speed_up_get_state
Browse files Browse the repository at this point in the history
Make get_state_groups_from_groups faster.
  • Loading branch information
erikjohnston authored May 17, 2017
2 parents ace2346 + bbfe4e9 commit d9e3a4b
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 41 deletions.
40 changes: 11 additions & 29 deletions synapse/storage/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,20 +563,22 @@ def _get_some_state_from_cache(self, group, types):
where a `state_key` of `None` matches all state_keys for the
`type`.
"""
is_all, state_dict_ids = self._state_group_cache.get(group)
is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)

type_to_key = {}
missing_types = set()

for typ, state_key in types:
key = (typ, state_key)
if state_key is None:
type_to_key[typ] = None
missing_types.add((typ, state_key))
missing_types.add(key)
else:
if type_to_key.get(typ, object()) is not None:
type_to_key.setdefault(typ, set()).add(state_key)

if (typ, state_key) not in state_dict_ids:
missing_types.add((typ, state_key))
if key not in state_dict_ids and key not in known_absent:
missing_types.add(key)

sentinel = object()

Expand All @@ -590,7 +592,7 @@ def include(typ, state_key):
return True
return False

got_all = not (missing_types or types is None)
got_all = is_all or not missing_types

return {
k: v for k, v in state_dict_ids.iteritems()
Expand All @@ -607,7 +609,7 @@ def _get_all_state_from_cache(self, group):
Args:
group: The state group to lookup
"""
is_all, state_dict_ids = self._state_group_cache.get(group)
is_all, _, state_dict_ids = self._state_group_cache.get(group)

return state_dict_ids, is_all

Expand All @@ -624,7 +626,7 @@ def _get_state_for_groups(self, groups, types=None):
missing_groups = []
if types is not None:
for group in set(groups):
state_dict_ids, missing_types, got_all = self._get_some_state_from_cache(
state_dict_ids, _, got_all = self._get_some_state_from_cache(
group, types
)
results[group] = state_dict_ids
Expand Down Expand Up @@ -653,19 +655,7 @@ def _get_state_for_groups(self, groups, types=None):
# Now we want to update the cache with all the things we fetched
# from the database.
for group, group_state_dict in group_to_state_dict.iteritems():
if types:
# We delibrately put key -> None mappings into the cache to
# cache absence of the key, on the assumption that if we've
# explicitly asked for some types then we will probably ask
# for them again.
state_dict = {
(intern_string(etype), intern_string(state_key)): None
for (etype, state_key) in types
}
state_dict.update(results[group])
results[group] = state_dict
else:
state_dict = results[group]
state_dict = results[group]

state_dict.update(
((intern_string(k[0]), intern_string(k[1])), to_ascii(v))
Expand All @@ -677,17 +667,9 @@ def _get_state_for_groups(self, groups, types=None):
key=group,
value=state_dict,
full=(types is None),
known_absent=types,
)

# Remove all the entries with None values. The None values were just
# used for bookkeeping in the cache.
for group, state_dict in results.iteritems():
results[group] = {
key: event_id
for key, event_id in state_dict.iteritems()
if event_id
}

defer.returnValue(results)

def get_next_state_group(self):
Expand Down
57 changes: 46 additions & 11 deletions synapse/util/caches/dictionary_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@
logger = logging.getLogger(__name__)


class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))):
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
"""Returned when getting an entry from the cache
Attributes:
full (bool): Whether the cache has the full or dict or just some keys.
If not full then not all requested keys will necessarily be present
in `value`
known_absent (set): Keys that were looked up in the dict and were not
there.
value (dict): The full or partial dict value
"""
def __len__(self):
return len(self.value)

Expand Down Expand Up @@ -58,21 +68,31 @@ def check_thread(self):
)

def get(self, key, dict_keys=None):
"""Fetch an entry out of the cache
Args:
key
dict_key(list): If given a set of keys then return only those keys
that exist in the cache.
Returns:
DictionaryEntry
"""
entry = self.cache.get(key, self.sentinel)
if entry is not self.sentinel:
self.metrics.inc_hits()

if dict_keys is None:
return DictionaryEntry(entry.full, dict(entry.value))
return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
else:
return DictionaryEntry(entry.full, {
return DictionaryEntry(entry.full, entry.known_absent, {
k: entry.value[k]
for k in dict_keys
if k in entry.value
})

self.metrics.inc_misses()
return DictionaryEntry(False, {})
return DictionaryEntry(False, set(), {})

def invalidate(self, key):
self.check_thread()
Expand All @@ -87,19 +107,34 @@ def invalidate_all(self):
self.sequence += 1
self.cache.clear()

def update(self, sequence, key, value, full=False):
def update(self, sequence, key, value, full=False, known_absent=None):
"""Updates the entry in the cache
Args:
sequence
key
value (dict): The value to update the cache with.
full (bool): Whether the given value is the full dict, or just a
partial subset there of. If not full then any existing entries
for the key will be updated.
known_absent (set): Set of keys that we know don't exist in the full
dict.
"""
self.check_thread()
if self.sequence == sequence:
# Only update the cache if the caches sequence number matches the
# number that the cache had before the SELECT was started (SYN-369)
if known_absent is None:
known_absent = set()
if full:
self._insert(key, value)
self._insert(key, value, known_absent)
else:
self._update_or_insert(key, value)
self._update_or_insert(key, value, known_absent)

def _update_or_insert(self, key, value):
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
def _update_or_insert(self, key, value, known_absent):
entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
entry.value.update(value)
entry.known_absent.update(known_absent)

def _insert(self, key, value):
self.cache[key] = DictionaryEntry(True, value)
def _insert(self, key, value, known_absent):
self.cache[key] = DictionaryEntry(True, known_absent, value)
2 changes: 1 addition & 1 deletion tests/util/test_dict_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_simple_cache_hit_full(self):
key = "test_simple_cache_hit_full"

v = self.cache.get(key)
self.assertEqual((False, {}), v)
self.assertEqual((False, set(), {}), v)

seq = self.cache.sequence
test_value = {"test": "test_simple_cache_hit_full"}
Expand Down

0 comments on commit d9e3a4b

Please sign in to comment.