From b3b3cafdadfefacf56fc044be19bc7390852c53f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Oct 2022 09:58:17 +0100 Subject: [PATCH 1/9] POC delete stale non-e2e devices for users --- synapse/handlers/device.py | 12 ++++++- synapse/storage/databases/main/devices.py | 38 ++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index f9cc5bddbcbd..74f550807470 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -402,6 +402,8 @@ async def check_device_registered( self._check_device_name_length(initial_device_display_name) + await self._delete_stale_devices(user_id) + if device_id is not None: new_device = await self.store.store_device( user_id=user_id, @@ -433,6 +435,14 @@ async def check_device_registered( raise errors.StoreError(500, "Couldn't generate a device ID.") + async def _delete_stale_devices(self, user_id: str) -> None: + """Delete any stale devices this user may have.""" + device_ids = await self.store.get_stale_devices(user_id) + if not device_ids: + return + + await self.delete_devices(user_id, device_ids) + async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than a configured time period. @@ -462,7 +472,7 @@ async def delete_all_devices_for_user( device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Delete several devices Args: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 18358eca467e..54d97e673a70 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1466,6 +1466,40 @@ def _txn(txn: LoggingTransaction) -> int: return rows + async def get_stale_devices(self, user_id: str) -> Collection[str]: + """Get set of devices that we could delete for this user.""" + + num_devices = await self.db_pool.simple_select_one_onecol( + table="devices", + keyvalues={"user_id": user_id, "hidden": "false"}, + retcol="COALESCE(COUNT(*), 0)", + desc="count_devices", + ) + + if num_devices > 10: + return () + + max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 + + sql = """ + SELECT DISTINCT device_id FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND not hidden + AND last_seen < ? + AND key_json IS NULL + """ + + def get_stale_devices_txn(txn: LoggingTransaction) -> Collection[str]: + txn.execute(sql, (user_id, max_last_seen)) + return {device_id for device_id, in txn} + + return await self.db_pool.runInteraction( + "get_stale_devices", + get_stale_devices_txn, + ) + class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): def __init__( @@ -1517,7 +1551,9 @@ async def store_device( "user_id": user_id, "device_id": device_id, }, - values={}, + values={ + "last_seen": self._clock.time_msec(), + }, insertion_values={ "display_name": initial_device_display_name, "hidden": False, From 93451da663f056c2894b73557cdb7c91e3aebe7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Oct 2022 12:52:53 +0100 Subject: [PATCH 2/9] Update synapse/storage/databases/main/devices.py Co-authored-by: Patrick Cloke --- synapse/storage/databases/main/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 54d97e673a70..5185abf223bd 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1471,7 +1471,7 @@ async def get_stale_devices(self, user_id: str) -> Collection[str]: num_devices = await self.db_pool.simple_select_one_onecol( table="devices", - keyvalues={"user_id": user_id, "hidden": "false"}, + keyvalues={"user_id": user_id, "hidden": False}, retcol="COALESCE(COUNT(*), 0)", desc="count_devices", ) From 748858acba3c38a8dfa2b29a048ec11f53c5c112 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Oct 2022 13:39:44 +0100 Subject: [PATCH 3/9] Fix equality --- synapse/storage/databases/main/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5185abf223bd..693dd7650ed8 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1476,7 +1476,7 @@ async def get_stale_devices(self, user_id: str) -> Collection[str]: desc="count_devices", ) - if num_devices > 10: + if num_devices < 10: return () max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 From 8f431b2356209be28fb91875090d652baeffad60 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Oct 2022 13:40:03 +0100 Subject: [PATCH 4/9] Fix case --- synapse/storage/databases/main/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 693dd7650ed8..ac04ab052c63 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1486,7 +1486,7 @@ async def get_stale_devices(self, user_id: str) -> Collection[str]: LEFT JOIN e2e_device_keys_json USING (user_id, device_id) WHERE user_id = ? - AND not hidden + AND NOT hidden AND last_seen < ? AND key_json IS NULL """ From 7c5658128ef36b0c74c52136881d19b3ccb0fea0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Nov 2022 10:45:24 +0000 Subject: [PATCH 5/9] Fix ups and improvements --- synapse/handlers/device.py | 7 ++-- synapse/storage/databases/main/devices.py | 42 +++++++++++++++++++---- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 64d6ec1bb6ba..eebf889575e6 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -421,7 +421,8 @@ async def check_device_registered( self._check_device_name_length(initial_device_display_name) - await self._delete_stale_devices(user_id) + # Prune the user's device list if they already have a lot of devices. + await self._prune_too_many_devices(user_id) if device_id is not None: new_device = await self.store.store_device( @@ -454,9 +455,9 @@ async def check_device_registered( raise errors.StoreError(500, "Couldn't generate a device ID.") - async def _delete_stale_devices(self, user_id: str) -> None: + async def _prune_too_many_devices(self, user_id: str) -> None: """Delete any stale devices this user may have.""" - device_ids = await self.store.get_stale_devices(user_id) + device_ids = await self.store.check_too_many_devices_for_user(user_id) if not device_ids: return diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 24161415c83c..3b5efc252b45 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1526,8 +1526,12 @@ def _txn(txn: LoggingTransaction) -> int: return rows - async def get_stale_devices(self, user_id: str) -> Collection[str]: - """Get set of devices that we could delete for this user.""" + async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]: + """Check if the user has a lot of devices, and if so return the set of + devices we can prune. + + This does *not* return hidden devices or devices with E2E keys. + """ num_devices = await self.db_pool.simple_select_one_onecol( table="devices", @@ -1536,11 +1540,35 @@ async def get_stale_devices(self, user_id: str) -> Collection[str]: desc="count_devices", ) + # We let users have up to ten devices without pruning. if num_devices < 10: return () + # We prune everything older than N days. max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 + if num_devices > 50: + # If the user has more than 50 devices, then we chose a last seen + # that ensures we keep at most 50 devices. + sql = """ + SELECT last_seen FROM devices + WHERE + user_id = ? + AND NOT hidden + AND last_seen IS NOT NULL + AND key_json IS NULL + ORDER BY last_seen DESC + LIMIT 1 + OFFSET 50 + """ + + rows = await self.db_pool.execute( + "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) + ) + if rows: + max_last_seen = max(rows[0][0], max_last_seen) + + # Now fetch the devices to delete. sql = """ SELECT DISTINCT device_id FROM devices LEFT JOIN e2e_device_keys_json USING (user_id, device_id) @@ -1551,13 +1579,15 @@ async def get_stale_devices(self, user_id: str) -> Collection[str]: AND key_json IS NULL """ - def get_stale_devices_txn(txn: LoggingTransaction) -> Collection[str]: + def check_too_many_devices_for_user_txn( + txn: LoggingTransaction, + ) -> Collection[str]: txn.execute(sql, (user_id, max_last_seen)) return {device_id for device_id, in txn} return await self.db_pool.runInteraction( - "get_stale_devices", - get_stale_devices_txn, + "check_too_many_devices_for_user", + check_too_many_devices_for_user_txn, ) @@ -1665,7 +1695,7 @@ async def store_device( ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Deletes several devices. Args: From 2c83e769348d3d42135c87e28ccdbce1f111b706 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Nov 2022 10:47:53 +0000 Subject: [PATCH 6/9] Newsfile --- changelog.d/14038.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14038.misc diff --git a/changelog.d/14038.misc b/changelog.d/14038.misc new file mode 100644 index 000000000000..f9bfc581ad53 --- /dev/null +++ b/changelog.d/14038.misc @@ -0,0 +1 @@ +Prune user's old devices on login if they have too many. From 2f65c20c1b699885b9dc3e6eb1884c1d1efe5fab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Nov 2022 11:34:35 +0000 Subject: [PATCH 7/9] Fix unit tests --- tests/storage/test_client_ips.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 49ad3c132425..a9af1babedf4 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -169,6 +169,8 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool): ) ) + last_seen = self.clock.time_msec() + if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -189,7 +191,7 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": None, + "last_seen": last_seen, }, ], ) From b2906ebdfab8e1f7ab34da766ad6a49c42969595 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Nov 2022 12:19:09 +0000 Subject: [PATCH 8/9] Actually fix unit tests for real --- synapse/storage/databases/main/devices.py | 5 ++--- tests/handlers/test_device.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 3b5efc252b45..d6e6a68b4161 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1645,11 +1645,10 @@ async def store_device( "user_id": user_id, "device_id": device_id, }, - values={ - "last_seen": self._clock.time_msec(), - }, + values={}, insertion_values={ "display_name": initial_device_display_name, + "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index ce7525e29c0a..a456bffd632f 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -115,7 +115,7 @@ def test_get_devices_by_user(self) -> None: "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": None, + "last_seen_ts": 1000000, }, device_map["xyz"], ) From f5927d7a9c6caa2e7428b53900c54ae1bab6da92 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Nov 2022 10:00:46 +0000 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- synapse/handlers/device.py | 2 +- synapse/storage/databases/main/devices.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index eebf889575e6..7c4dd8cf5a3d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -456,7 +456,7 @@ async def check_device_registered( raise errors.StoreError(500, "Couldn't generate a device ID.") async def _prune_too_many_devices(self, user_id: str) -> None: - """Delete any stale devices this user may have.""" + """Delete any excess old devices this user may have.""" device_ids = await self.store.check_too_many_devices_for_user(user_id) if not device_ids: return diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index d6e6a68b4161..1d7f5b15d1af 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1541,7 +1541,7 @@ async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str] ) # We let users have up to ten devices without pruning. - if num_devices < 10: + if num_devices <= 10: return () # We prune everything older than N days.