Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Quarantine media by ID or user ID #6681

Merged
merged 15 commits into from
Jan 13, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/6681.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend the quarantine_media admin API to quarantine media by ID or all media by a specific user.
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
50 changes: 43 additions & 7 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,55 @@ It returns a JSON body like the following:
}
```

# Quarantine media in a room
# Quarantine media

This API 'quarantines' all the media in a room.
Quarantining media means that it is marked as inaccessible by users. It applies
to any local media, and any locally-cached copies of remote media.

The API is:
The media file itself (and any thumbnails) is not deleted from the server.

## Quarantining media by ID

This API quarantines a single piece of local or remote media.

```
POST /_synapse/admin/v1/quarantine_media/<room_id>
POST /_synapse/admin/v1/quarantine_media_by_id/<server_name>/<media_id>

{}
```

Quarantining media means that it is marked as inaccessible by users. It applies
to any local media, and any locally-cached copies of remote media.
Where `server_name` is in the form of `example.org`, and `media_id` is in the
form of `abcdefg12345...`.

The media file itself (and any thumbnails) is not deleted from the server.
## Quarantining media in a room

This API quarantines all local and remote media in a room.

```
POST /_synapse/admin/v1/quarantine_media_by_room/<room_id>

{
"num_quarantined": 10 # The number of media items successfully quarantined
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
}
```

Where `room_id` is in the form of `!roomid12345:example.org`.

Note that there is a legacy endpoint, `POST
/_synapse/admin/v1/quarantine_media/<room_id >`, that operates the same.
However, it is deprecated and may be removed in a future release.

## Quarantining all media of a user

This API quarantines all *local* media that a *local* user has uploaded. That is to say, if
you would like to quarantine media uploaded by a user on a remote homeserver, you should
instead use one of the other APIs.

```
POST /_synapse/admin/v1/quarantine_media_by_user/<user_id>
{
"num_quarantined": 10 # The number of media items successfully quarantined
}
```

Where `user_id` is in the form of `@bob:example.org`.
68 changes: 66 additions & 2 deletions synapse/rest/admin/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,85 @@ class QuarantineMediaInRoom(RestServlet):
this server.
"""

PATTERNS = historical_admin_path_patterns("/quarantine_media/(?P<room_id>[^/]+)")
PATTERNS = (
historical_admin_path_patterns("/quarantine_media_by_room/(?P<room_id>[^/]+)")
+
# This path kept around for legacy reasons
historical_admin_path_patterns("/quarantine_media/(?P<room_id>![^/]+)")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra ! snuck in here 🤦‍♂️

)

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()

async def on_POST(self, request, room_id):
async def on_POST(self, request, room_id: str):
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)

logging.info("Quarantining room: %s", room_id)

# Quarantine all media in this room
num_quarantined = await self.store.quarantine_media_ids_in_room(
room_id, requester.user.to_string()
)

return 200, {"num_quarantined": num_quarantined}


class QuarantineMediaByUser(RestServlet):
"""Quarantines all local media by a given user so that no one can download it via
this server.
"""

PATTERNS = historical_admin_path_patterns(
"/quarantine_media_by_user/(?P<user_id>[^/]+)"
)

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()

async def on_POST(self, request, user_id: str):
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)

logging.info("Quarantining local media by user: %s", user_id)

# Quarantine all media this user has uploaded
num_quarantined = await self.store.quarantine_media_ids_by_user(
user_id, requester.user.to_string()
)

return 200, {"num_quarantined": num_quarantined}


class QuarantineMediaByID(RestServlet):
"""Quarantines local or remote media by a given ID so that no one can download
it via this server.
"""

PATTERNS = historical_admin_path_patterns(
"/quarantine_media_by_id/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)"
)

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()

async def on_POST(self, request, server_name: str, media_id: str):
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)

logging.info("Quarantining local media by ID: %s/%s", server_name, media_id)

# Quarantine this media id
await self.store.quarantine_media_by_id(
server_name, media_id, requester.user.to_string()
)

return 200, {}


class ListMediaInRoom(RestServlet):
"""Lists all of the media in a given room.
"""
Expand Down Expand Up @@ -94,4 +156,6 @@ def register_servlets_for_media_repo(hs, http_server):
"""
PurgeMediaCacheRestServlet(hs).register(http_server)
QuarantineMediaInRoom(hs).register(http_server)
QuarantineMediaByID(hs).register(http_server)
QuarantineMediaByUser(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
144 changes: 143 additions & 1 deletion synapse/storage/data_stores/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
import re
from abc import abstractmethod
from typing import Optional, Tuple
from typing import List, Optional, Tuple

from six import integer_types

Expand Down Expand Up @@ -399,6 +399,8 @@ def quarantine_media_ids_in_room(self, room_id, quarantined_by):
the associated media
"""

logger.info("Quarantining media in room: %s", room_id)

def _quarantine_media_in_room_txn(txn):
local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
total_media_quarantined = 0
Expand Down Expand Up @@ -494,6 +496,118 @@ def _get_media_mxcs_in_room_txn(self, txn, room_id):

return local_media_mxcs, remote_media_mxcs

def quarantine_media_by_id(
self, server_name: str, media_id: str, quarantined_by: str,
):
"""quarantines a single local or remote media id

Args:
server_name: The name of the server that holds this media
media_id: The ID of the media to be quarantined
quarantined_by: The user ID that initiated the quarantine request
"""
logger.info("Quarantining media: %s/%s", server_name, media_id)
is_local = server_name == self.config.server_name

def _quarantine_media_by_id_txn(txn):
local_mxcs = [media_id] if is_local else []
remote_mxcs = [(server_name, media_id)] if not is_local else []

return self._quarantine_media_txn(
txn, local_mxcs, remote_mxcs, quarantined_by
)

return self.db.runInteraction(
"quarantine_media_by_user", _quarantine_media_by_id_txn
)

def quarantine_media_ids_by_user(self, user_id: str, quarantined_by: str):
"""quarantines all local media associated with a single user

Args:
user_id: The ID of the user to quarantine media of
quarantined_by: The ID of the user who made the quarantine request
"""

def _quarantine_media_by_user_txn(txn):
local_media_ids = self._get_media_ids_by_user_txn(txn, user_id)
return self._quarantine_media_txn(txn, local_media_ids, [], quarantined_by)

return self.db.runInteraction(
"quarantine_media_by_user", _quarantine_media_by_user_txn
)

def _get_media_ids_by_user_txn(self, txn, user_id: str, filter_quarantined=True):
"""Retrieves local media IDs by a given user

Args:
txn (cursor)
user_id: The ID of the user to retrieve media IDs of

Returns:
The local and remote media as a lists of tuples where the key is
the hostname and the value is the media ID.
"""
# Local media
sql = """
SELECT media_id
FROM local_media_repository
WHERE user_id = ?
"""
if filter_quarantined:
sql += "AND quarantined_by IS NULL"
txn.execute(sql, (user_id,))

local_media_ids = [row[0] for row in txn]

# TODO: Figure out all remote media a user has referenced in a message

return local_media_ids

def _quarantine_media_txn(
self,
txn,
local_mxcs: List[str],
remote_mxcs: List[Tuple[str, str]],
quarantined_by: str,
) -> int:
"""Quarantine local and remote media items

Args:
txn (cursor)
local_mxcs: A list of local mxc URLs
remote_mxcs: A list of (remote server, media id) tuples representing
remote mxc URLs
quarantined_by: The ID of the user who initiated the quarantine request
Returns:
The total number of media items quarantined
"""
total_media_quarantined = 0

# Update all the tables to set the quarantined_by flag
txn.executemany(
"""
UPDATE local_media_repository
SET quarantined_by = ?
WHERE media_id = ?
""",
((quarantined_by, media_id) for media_id in local_mxcs),
)

txn.executemany(
"""
UPDATE remote_media_cache
SET quarantined_by = ?
WHERE media_origin = ? AND media_id = ?
""",
((quarantined_by, origin, media_id) for origin, media_id in remote_mxcs),
)

total_media_quarantined += len(local_mxcs)
total_media_quarantined += len(remote_mxcs)

return total_media_quarantined


class RoomBackgroundUpdateStore(SQLBaseStore):
REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
Expand Down Expand Up @@ -938,6 +1052,34 @@ def block_room(self, room_id, user_id):
(room_id,),
)

def get_media_mxcs_in_room(self, room_id):
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
"""Retrieves all the local and remote media MXC URIs in a given room

Args:
room_id (str)

Returns:
The local and remote media as a lists of tuples where the key is
the hostname and the value is the media ID.
"""

def _get_media_mxcs_in_room_txn(txn):
local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
local_media_mxcs = []
remote_media_mxcs = []

# Convert the IDs to MXC URIs
for media_id in local_mxcs:
local_media_mxcs.append("mxc://%s/%s" % (self.hs.hostname, media_id))
for hostname, media_id in remote_mxcs:
remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id))

return local_media_mxcs, remote_media_mxcs

return self.db.runInteraction(
"get_media_ids_in_room", _get_media_mxcs_in_room_txn
)

@defer.inlineCallbacks
def get_rooms_for_retention_period_in_range(
self, min_ms, max_ms, include_null=False
Expand Down
Loading