Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Revert the deletion of stale devices due to performance issues. #14662

Merged
merged 3 commits into from
Dec 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion changelog.d/14595.misc

This file was deleted.

1 change: 0 additions & 1 deletion changelog.d/14649.misc

This file was deleted.

1 change: 1 addition & 0 deletions changelog.d/14662.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(remove from changelog: unreleased) Revert the deletion of stale devices due to performance issues.
33 changes: 1 addition & 32 deletions synapse/handlers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
from synapse.util.async_helpers import Linearizer
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.cancellation import cancellable
from synapse.util.iterutils import batch_iter
from synapse.util.metrics import measure_func
from synapse.util.retryutils import NotRetryingDestination

Expand Down Expand Up @@ -422,9 +421,6 @@ async def check_device_registered(

self._check_device_name_length(initial_device_display_name)

# Prune the user's device list if they already have a lot of devices.
await self._prune_too_many_devices(user_id)

if device_id is not None:
new_device = await self.store.store_device(
user_id=user_id,
Expand Down Expand Up @@ -456,33 +452,6 @@ async def check_device_registered(

raise errors.StoreError(500, "Couldn't generate a device ID.")

async def _prune_too_many_devices(self, user_id: str) -> None:
"""Delete any excess old devices this user may have."""
device_ids = await self.store.check_too_many_devices_for_user(user_id, 100)
if not device_ids:
return

logger.info("Pruning %d old devices for user %s", len(device_ids), user_id)

# We don't want to block and try and delete tonnes of devices at once,
# so we cap the number of devices we delete synchronously.
first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
await self.delete_devices(user_id, first_batch)

if not remaining_device_ids:
return

# Now spawn a background loop that deletes the rest.
async def _prune_too_many_devices_loop() -> None:
for batch in batch_iter(remaining_device_ids, 10):
await self.delete_devices(user_id, batch)

await self.clock.sleep(1)

run_as_background_process(
"_prune_too_many_devices_loop", _prune_too_many_devices_loop
)

async def _delete_stale_devices(self) -> None:
"""Background task that deletes devices which haven't been accessed for more than
a configured time period.
Expand Down Expand Up @@ -512,7 +481,7 @@ async def delete_all_devices_for_user(
device_ids = [d for d in device_ids if d != except_device_id]
await self.delete_devices(user_id, device_ids)

async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
"""Delete several devices

Args:
Expand Down
84 changes: 1 addition & 83 deletions synapse/storage/databases/main/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1569,77 +1569,6 @@ def _txn(txn: LoggingTransaction) -> int:

return rows

async def check_too_many_devices_for_user(
self, user_id: str, limit: int
) -> List[str]:
"""Check if the user has a lot of devices, and if so return the set of
devices we can prune.

This does *not* return hidden devices or devices with E2E keys.

Returns at most `limit` number of devices, ordered by last seen.
"""

num_devices = await self.db_pool.simple_select_one_onecol(
table="devices",
keyvalues={"user_id": user_id, "hidden": False},
retcol="COALESCE(COUNT(*), 0)",
desc="count_devices",
)

# We let users have up to ten devices without pruning.
if num_devices <= 10:
return []

# We prune everything older than N days.
max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000

if num_devices > 50:
# If the user has more than 50 devices, then we chose a last seen
# that ensures we keep at most 50 devices.
sql = """
SELECT last_seen FROM devices
LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
WHERE
user_id = ?
AND NOT hidden
AND last_seen IS NOT NULL
AND key_json IS NULL
ORDER BY last_seen DESC
LIMIT 1
OFFSET 50
"""

rows = await self.db_pool.execute(
"check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
)
if rows:
max_last_seen = max(rows[0][0], max_last_seen)

# Now fetch the devices to delete.
sql = """
SELECT device_id FROM devices
LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
WHERE
user_id = ?
AND NOT hidden
AND last_seen < ?
AND key_json IS NULL
ORDER BY last_seen
LIMIT ?
"""

def check_too_many_devices_for_user_txn(
txn: LoggingTransaction,
) -> List[str]:
txn.execute(sql, (user_id, max_last_seen, limit))
return [device_id for device_id, in txn]

return await self.db_pool.runInteraction(
"check_too_many_devices_for_user",
check_too_many_devices_for_user_txn,
)


class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
# Because we have write access, this will be a StreamIdGenerator
Expand Down Expand Up @@ -1698,7 +1627,6 @@ async def store_device(
values={},
insertion_values={
"display_name": initial_device_display_name,
"last_seen": self._clock.time_msec(),
"hidden": False,
},
desc="store_device",
Expand Down Expand Up @@ -1744,15 +1672,7 @@ async def store_device(
)
raise StoreError(500, "Problem storing device.")

@cached(max_entries=0)
async def delete_device(self, user_id: str, device_id: str) -> None:
raise NotImplementedError()

# Note: sometimes deleting rows out of `device_inbox` can take a long time,
# so we use a cache so that we deduplicate in flight requests to delete
# devices.
@cachedList(cached_method_name="delete_device", list_name="device_ids")
async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
"""Deletes several devices.

Args:
Expand Down Expand Up @@ -1789,8 +1709,6 @@ def _delete_devices_txn(txn: LoggingTransaction) -> None:
for device_id in device_ids:
self.device_id_exists_cache.invalidate((user_id, device_id))

return {}

async def update_device(
self, user_id: str, device_id: str, new_display_name: Optional[str] = None
) -> None:
Expand Down
33 changes: 1 addition & 32 deletions tests/handlers/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@

from synapse.api.errors import NotFoundError, SynapseError
from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
from synapse.rest import admin
from synapse.rest.client import account, login
from synapse.server import HomeServer
from synapse.util import Clock

Expand All @@ -32,12 +30,6 @@


class DeviceTestCase(unittest.HomeserverTestCase):
servlets = [
login.register_servlets,
admin.register_servlets,
account.register_servlets,
]

def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
hs = self.setup_test_homeserver("server", federation_http_client=None)
handler = hs.get_device_handler()
Expand Down Expand Up @@ -123,7 +115,7 @@ def test_get_devices_by_user(self) -> None:
"device_id": "xyz",
"display_name": "display 0",
"last_seen_ip": None,
"last_seen_ts": 1000000,
"last_seen_ts": None,
},
device_map["xyz"],
)
Expand Down Expand Up @@ -237,29 +229,6 @@ def test_update_unknown_device(self) -> None:
NotFoundError,
)

def test_login_delete_old_devices(self) -> None:
"""Delete old devices if the user already has too many."""

user_id = self.register_user("user", "pass")

# Create a bunch of devices
for _ in range(50):
self.login("user", "pass")
self.reactor.advance(1)

# Advance the clock for ages (as we only delete old devices)
self.reactor.advance(60 * 60 * 24 * 300)

# Log in again to start the pruning
self.login("user", "pass")

# Give the background job time to do its thing
self.reactor.pump([1.0] * 100)

# We should now only have the most recent device.
devices = self.get_success(self.handler.get_devices_by_user(user_id))
self.assertEqual(len(devices), 1)

def _record_users(self) -> None:
# check this works for both devices which have a recorded client_ip,
# and those which don't.
Expand Down
4 changes: 1 addition & 3 deletions tests/storage/test_client_ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,6 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool) -> None:
)
)

last_seen = self.clock.time_msec()

if after_persisting:
# Trigger the storage loop
self.reactor.advance(10)
Expand All @@ -192,7 +190,7 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool) -> None:
"device_id": device_id,
"ip": None,
"user_agent": None,
"last_seen": last_seen,
"last_seen": None,
},
],
)
Expand Down