Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Make get_state_groups_from_groups faster. #2230

Merged
merged 1 commit into from
May 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 11 additions & 29 deletions synapse/storage/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,20 +563,22 @@ def _get_some_state_from_cache(self, group, types):
where a `state_key` of `None` matches all state_keys for the
`type`.
"""
is_all, state_dict_ids = self._state_group_cache.get(group)
is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)

type_to_key = {}
missing_types = set()

for typ, state_key in types:
key = (typ, state_key)
if state_key is None:
type_to_key[typ] = None
missing_types.add((typ, state_key))
missing_types.add(key)
else:
if type_to_key.get(typ, object()) is not None:
type_to_key.setdefault(typ, set()).add(state_key)

if (typ, state_key) not in state_dict_ids:
missing_types.add((typ, state_key))
if key not in state_dict_ids and key not in known_absent:
missing_types.add(key)

sentinel = object()

Expand All @@ -590,7 +592,7 @@ def include(typ, state_key):
return True
return False

got_all = not (missing_types or types is None)
got_all = is_all or not missing_types

return {
k: v for k, v in state_dict_ids.iteritems()
Expand All @@ -607,7 +609,7 @@ def _get_all_state_from_cache(self, group):
Args:
group: The state group to lookup
"""
is_all, state_dict_ids = self._state_group_cache.get(group)
is_all, _, state_dict_ids = self._state_group_cache.get(group)

return state_dict_ids, is_all

Expand All @@ -624,7 +626,7 @@ def _get_state_for_groups(self, groups, types=None):
missing_groups = []
if types is not None:
for group in set(groups):
state_dict_ids, missing_types, got_all = self._get_some_state_from_cache(
state_dict_ids, _, got_all = self._get_some_state_from_cache(
group, types
)
results[group] = state_dict_ids
Expand Down Expand Up @@ -653,19 +655,7 @@ def _get_state_for_groups(self, groups, types=None):
# Now we want to update the cache with all the things we fetched
# from the database.
for group, group_state_dict in group_to_state_dict.iteritems():
if types:
# We delibrately put key -> None mappings into the cache to
# cache absence of the key, on the assumption that if we've
# explicitly asked for some types then we will probably ask
# for them again.
state_dict = {
(intern_string(etype), intern_string(state_key)): None
for (etype, state_key) in types
}
state_dict.update(results[group])
results[group] = state_dict
else:
state_dict = results[group]
state_dict = results[group]

state_dict.update(
((intern_string(k[0]), intern_string(k[1])), to_ascii(v))
Expand All @@ -677,17 +667,9 @@ def _get_state_for_groups(self, groups, types=None):
key=group,
value=state_dict,
full=(types is None),
known_absent=types,
)

# Remove all the entries with None values. The None values were just
# used for bookkeeping in the cache.
for group, state_dict in results.iteritems():
results[group] = {
key: event_id
for key, event_id in state_dict.iteritems()
if event_id
}

defer.returnValue(results)

def get_next_state_group(self):
Expand Down
57 changes: 46 additions & 11 deletions synapse/util/caches/dictionary_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@
logger = logging.getLogger(__name__)


class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))):
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
"""Returned when getting an entry from the cache

Attributes:
full (bool): Whether the cache has the full or dict or just some keys.
If not full then not all requested keys will necessarily be present
in `value`
known_absent (set): Keys that were looked up in the dict and were not
there.
value (dict): The full or partial dict value
"""
def __len__(self):
return len(self.value)

Expand Down Expand Up @@ -58,21 +68,31 @@ def check_thread(self):
)

def get(self, key, dict_keys=None):
"""Fetch an entry out of the cache

Args:
key
dict_key(list): If given a set of keys then return only those keys
that exist in the cache.

Returns:
DictionaryEntry
"""
entry = self.cache.get(key, self.sentinel)
if entry is not self.sentinel:
self.metrics.inc_hits()

if dict_keys is None:
return DictionaryEntry(entry.full, dict(entry.value))
return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
else:
return DictionaryEntry(entry.full, {
return DictionaryEntry(entry.full, entry.known_absent, {
k: entry.value[k]
for k in dict_keys
if k in entry.value
})

self.metrics.inc_misses()
return DictionaryEntry(False, {})
return DictionaryEntry(False, set(), {})

def invalidate(self, key):
self.check_thread()
Expand All @@ -87,19 +107,34 @@ def invalidate_all(self):
self.sequence += 1
self.cache.clear()

def update(self, sequence, key, value, full=False):
def update(self, sequence, key, value, full=False, known_absent=None):
"""Updates the entry in the cache

Args:
sequence
key
value (dict): The value to update the cache with.
full (bool): Whether the given value is the full dict, or just a
partial subset there of. If not full then any existing entries
for the key will be updated.
known_absent (set): Set of keys that we know don't exist in the full
dict.
"""
self.check_thread()
if self.sequence == sequence:
# Only update the cache if the caches sequence number matches the
# number that the cache had before the SELECT was started (SYN-369)
if known_absent is None:
known_absent = set()
if full:
self._insert(key, value)
self._insert(key, value, known_absent)
else:
self._update_or_insert(key, value)
self._update_or_insert(key, value, known_absent)

def _update_or_insert(self, key, value):
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
def _update_or_insert(self, key, value, known_absent):
entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
entry.value.update(value)
entry.known_absent.update(known_absent)

def _insert(self, key, value):
self.cache[key] = DictionaryEntry(True, value)
def _insert(self, key, value, known_absent):
self.cache[key] = DictionaryEntry(True, known_absent, value)
2 changes: 1 addition & 1 deletion tests/util/test_dict_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_simple_cache_hit_full(self):
key = "test_simple_cache_hit_full"

v = self.cache.get(key)
self.assertEqual((False, {}), v)
self.assertEqual((False, set(), {}), v)

seq = self.cache.sequence
test_value = {"test": "test_simple_cache_hit_full"}
Expand Down