-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Make purge_history operate on tokens #3221
Changes from all commits
37dbee6
5f27ed7
c945af8
43e6e82
680530c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,7 @@ | |
from synapse.api.constants import EventTypes | ||
from synapse.api.errors import SynapseError | ||
from synapse.util.caches.descriptors import cached, cachedInlineCallbacks | ||
from synapse.types import get_domain_from_id | ||
from synapse.types import get_domain_from_id, RoomStreamToken | ||
import synapse.metrics | ||
|
||
# these are only included to make the type annotations work | ||
|
@@ -1803,15 +1803,14 @@ def get_all_new_events_txn(txn): | |
return self.runInteraction("get_all_new_events", get_all_new_events_txn) | ||
|
||
def purge_history( | ||
self, room_id, topological_ordering, delete_local_events, | ||
self, room_id, token, delete_local_events, | ||
): | ||
"""Deletes room history before a certain point | ||
Args: | ||
room_id (str): | ||
topological_ordering (int): | ||
minimum topo ordering to preserve | ||
token (str): A topological token to delete events before | ||
delete_local_events (bool): | ||
if True, we will delete local events as well as remote ones | ||
|
@@ -1821,13 +1820,15 @@ def purge_history( | |
|
||
return self.runInteraction( | ||
"purge_history", | ||
self._purge_history_txn, room_id, topological_ordering, | ||
self._purge_history_txn, room_id, token, | ||
delete_local_events, | ||
) | ||
|
||
def _purge_history_txn( | ||
self, txn, room_id, topological_ordering, delete_local_events, | ||
self, txn, room_id, token_str, delete_local_events, | ||
): | ||
token = RoomStreamToken.parse(token_str) | ||
|
||
# Tables that should be pruned: | ||
# event_auth | ||
# event_backward_extremities | ||
|
@@ -1872,6 +1873,13 @@ def _purge_history_txn( | |
" ON events_to_purge(should_delete)", | ||
) | ||
|
||
# We do joins against events_to_purge for e.g. calculating state | ||
# groups to purge, etc., so lets make an index. | ||
txn.execute( | ||
"CREATE INDEX events_to_purge_id" | ||
" ON events_to_purge(event_id)", | ||
) | ||
|
||
# First ensure that we're not about to delete all the forward extremeties | ||
txn.execute( | ||
"SELECT e.event_id, e.depth FROM events as e " | ||
|
@@ -1884,7 +1892,7 @@ def _purge_history_txn( | |
rows = txn.fetchall() | ||
max_depth = max(row[0] for row in rows) | ||
|
||
if max_depth <= topological_ordering: | ||
if max_depth <= token.topological: | ||
# We need to ensure we don't delete all the events from the datanase | ||
# otherwise we wouldn't be able to send any events (due to not | ||
# having any backwards extremeties) | ||
|
@@ -1900,7 +1908,7 @@ def _purge_history_txn( | |
should_delete_expr += " AND event_id NOT LIKE ?" | ||
should_delete_params += ("%:" + self.hs.hostname, ) | ||
|
||
should_delete_params += (room_id, topological_ordering) | ||
should_delete_params += (room_id, token.topological) | ||
|
||
txn.execute( | ||
"INSERT INTO events_to_purge" | ||
|
@@ -1923,13 +1931,13 @@ def _purge_history_txn( | |
logger.info("[purge] Finding new backward extremities") | ||
|
||
# We calculate the new entries for the backward extremeties by finding | ||
# all events that point to events that are to be purged | ||
# events to be purged that are pointed to by events we're not going to | ||
# purge. | ||
txn.execute( | ||
"SELECT DISTINCT e.event_id FROM events_to_purge AS e" | ||
" INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" | ||
" INNER JOIN events AS e2 ON e2.event_id = ed.event_id" | ||
" WHERE e2.topological_ordering >= ?", | ||
(topological_ordering, ) | ||
" LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id" | ||
" WHERE ep2.event_id IS NULL", | ||
) | ||
new_backwards_extrems = txn.fetchall() | ||
|
||
|
@@ -1953,16 +1961,22 @@ def _purge_history_txn( | |
|
||
# Get all state groups that are only referenced by events that are | ||
# to be deleted. | ||
txn.execute( | ||
"SELECT state_group FROM event_to_state_groups" | ||
" INNER JOIN events USING (event_id)" | ||
" WHERE state_group IN (" | ||
" SELECT DISTINCT state_group FROM events_to_purge" | ||
" INNER JOIN event_to_state_groups USING (event_id)" | ||
" )" | ||
" GROUP BY state_group HAVING MAX(topological_ordering) < ?", | ||
(topological_ordering, ) | ||
) | ||
# This works by first getting state groups that we may want to delete, | ||
# joining against event_to_state_groups to get events that use that | ||
# state group, then left joining against events_to_purge again. Any | ||
# state group where the left join produce *no nulls* are referenced | ||
# only by events that are going to be purged. | ||
txn.execute(""" | ||
SELECT state_group FROM | ||
( | ||
SELECT DISTINCT state_group FROM events_to_purge | ||
INNER JOIN event_to_state_groups USING (event_id) | ||
) AS sp | ||
INNER JOIN event_to_state_groups USING (state_group) | ||
LEFT JOIN events_to_purge AS ep USING (event_id) | ||
GROUP BY state_group | ||
HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0 | ||
""") | ||
|
||
state_rows = txn.fetchall() | ||
logger.info("[purge] found %i redundant state groups", len(state_rows)) | ||
|
@@ -2109,10 +2123,25 @@ def _purge_history_txn( | |
# | ||
# So, let's stick it at the end so that we don't block event | ||
# persistence. | ||
logger.info("[purge] updating room_depth") | ||
# | ||
# We do this by calculating the minimum depth of the backwards | ||
# extremities. However, the events in event_backward_extremities | ||
# are ones we don't have yet so we need to look at the events that | ||
# point to it via event_edges table. | ||
txn.execute(""" | ||
SELECT COALESCE(MIN(depth), 0) | ||
FROM event_backward_extremities AS eb | ||
INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id | ||
INNER JOIN events AS e ON e.event_id = eg.event_id | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this looking for the prev_event before the extremity? which we have likely just purged? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is looking at the events that point to the extremity, as we don't actually have the extremity events. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (And I think the check is the right way round?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh right. ehm. I'm not convinced this is going to calculate the right thing in the presence of disconnected bits of dag. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Each disconnected part of the DAG will have their backwards extremities in the |
||
WHERE eb.room_id = ? | ||
""", (room_id,)) | ||
min_depth, = txn.fetchone() | ||
|
||
logger.info("[purge] updating room_depth to %d", min_depth) | ||
|
||
txn.execute( | ||
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?", | ||
(topological_ordering, room_id,) | ||
(min_depth, room_id,) | ||
) | ||
|
||
# finally, drop the temp table. this will commit the txn in sqlite, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the comment on this doesn't really seem to be true any more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How so?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we now look for events we are about to purge, which point to events we are not going to purge. it's completely different.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, its different in terms that we're not returning the event that points to purged events but the purged events that we get pointed at, yes. (And this isn't anything to do with the change in the sql, and was wrong before)