Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add an admin route to get all the media in a room #2818

Merged
merged 6 commits into from
Feb 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# List all media in a room

This API gets a list of known media in a room.

The API is:
```
GET /_matrix/client/r0/admin/room/<room_id>/media
```
including an `access_token` of a server admin.

It returns a JSON body like the following:
```
{
"local": [
"mxc://localhost/xwvutsrqponmlkjihgfedcba",
"mxc://localhost/abcdefghijklmnopqrstuvwx"
],
"remote": [
"mxc://matrix.org/xwvutsrqponmlkjihgfedcba",
"mxc://matrix.org/abcdefghijklmnopqrstuvwx"
]
}
```
22 changes: 22 additions & 0 deletions synapse/rest/client/v1/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,27 @@ def on_POST(self, request, room_id):
defer.returnValue((200, {"num_quarantined": num_quarantined}))


class ListMediaInRoom(ClientV1RestServlet):
"""Lists all of the media in a given room.
"""
PATTERNS = client_path_patterns("/admin/room/(?P<room_id>[^/]+)/media")

def __init__(self, hs):
super(ListMediaInRoom, self).__init__(hs)
self.store = hs.get_datastore()

@defer.inlineCallbacks
def on_GET(self, request, room_id):
requester = yield self.auth.get_user_by_req(request)
is_admin = yield self.auth.is_server_admin(requester.user)
if not is_admin:
raise AuthError(403, "You are not a server admin")

local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id)

defer.returnValue((200, {"local": local_mxcs, "remote": remote_mxcs}))


class ResetPasswordRestServlet(ClientV1RestServlet):
"""Post request to allow an administrator reset password for a user.
This needs user to have administrator access in Synapse.
Expand Down Expand Up @@ -487,3 +508,4 @@ def register_servlets(hs, http_server):
SearchUsersRestServlet(hs).register(http_server)
ShutdownRoomRestServlet(hs).register(http_server)
QuarantineMediaInRoom(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
153 changes: 97 additions & 56 deletions synapse/storage/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,73 +533,114 @@ def block_room(self, room_id, user_id):
)
self.is_room_blocked.invalidate((room_id,))

def get_media_mxcs_in_room(self, room_id):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please could you document the args and return types (sorry I know synapse is bad about this in general - but I'm trying to not let it get any worse)

"""Retrieves all the local and remote media MXC URIs in a given room

Args:
room_id (str)

Returns:
The local and remote media as a lists of tuples where the key is
the hostname and the value is the media ID.
"""
def _get_media_mxcs_in_room_txn(txn):
local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
local_media_mxcs = []
remote_media_mxcs = []

# Convert the IDs to MXC URIs
for media_id in local_mxcs:
local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id))
for hostname, media_id in remote_mxcs:
remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id))

return local_media_mxcs, remote_media_mxcs
return self.runInteraction("get_media_ids_in_room", _get_media_mxcs_in_room_txn)

def quarantine_media_ids_in_room(self, room_id, quarantined_by):
"""For a room loops through all events with media and quarantines
the associated media
"""
def _get_media_ids_in_room(txn):
mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")
def _quarantine_media_in_room_txn(txn):
local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
total_media_quarantined = 0

next_token = self.get_current_events_token() + 1
# Now update all the tables to set the quarantined_by flag

total_media_quarantined = 0
txn.executemany("""
UPDATE local_media_repository
SET quarantined_by = ?
WHERE media_id = ?
""", ((quarantined_by, media_id) for media_id in local_mxcs))

while next_token:
sql = """
SELECT stream_ordering, content FROM events
WHERE room_id = ?
AND stream_ordering < ?
AND contains_url = ? AND outlier = ?
ORDER BY stream_ordering DESC
LIMIT ?
txn.executemany(
"""
txn.execute(sql, (room_id, next_token, True, False, 100))

next_token = None
local_media_mxcs = []
remote_media_mxcs = []
for stream_ordering, content_json in txn:
next_token = stream_ordering
content = json.loads(content_json)

content_url = content.get("url")
thumbnail_url = content.get("info", {}).get("thumbnail_url")

for url in (content_url, thumbnail_url):
if not url:
continue
matches = mxc_re.match(url)
if matches:
hostname = matches.group(1)
media_id = matches.group(2)
if hostname == self.hostname:
local_media_mxcs.append(media_id)
else:
remote_media_mxcs.append((hostname, media_id))

# Now update all the tables to set the quarantined_by flag

txn.executemany("""
UPDATE local_media_repository
UPDATE remote_media_cache
SET quarantined_by = ?
WHERE media_id = ?
""", ((quarantined_by, media_id) for media_id in local_media_mxcs))

txn.executemany(
"""
UPDATE remote_media_cache
SET quarantined_by = ?
WHERE media_origin = ? AND media_id = ?
""",
(
(quarantined_by, origin, media_id)
for origin, media_id in remote_media_mxcs
)
WHERE media_origin = ? AND media_id = ?
""",
(
(quarantined_by, origin, media_id)
for origin, media_id in remote_mxcs
)
)

total_media_quarantined += len(local_media_mxcs)
total_media_quarantined += len(remote_media_mxcs)
total_media_quarantined += len(local_mxcs)
total_media_quarantined += len(remote_mxcs)

return total_media_quarantined

return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room)
return self.runInteraction(
"quarantine_media_in_room",
_quarantine_media_in_room_txn,
)

def _get_media_mxcs_in_room_txn(self, txn, room_id):
"""Retrieves all the local and remote media MXC URIs in a given room

Args:
txn (cursor)
room_id (str)

Returns:
The local and remote media as a lists of tuples where the key is
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't really correct. The local media is just a list of media ids. Also, tuples don't have keys and values.

the hostname and the value is the media ID.
"""
mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")

next_token = self.get_current_events_token() + 1
local_media_mxcs = []
remote_media_mxcs = []

while next_token:
sql = """
SELECT stream_ordering, content FROM events
WHERE room_id = ?
AND stream_ordering < ?
AND contains_url = ? AND outlier = ?
ORDER BY stream_ordering DESC
LIMIT ?
"""
txn.execute(sql, (room_id, next_token, True, False, 100))

next_token = None
for stream_ordering, content_json in txn:
next_token = stream_ordering
content = json.loads(content_json)

content_url = content.get("url")
thumbnail_url = content.get("info", {}).get("thumbnail_url")

for url in (content_url, thumbnail_url):
if not url:
continue
matches = mxc_re.match(url)
if matches:
hostname = matches.group(1)
media_id = matches.group(2)
if hostname == self.hostname:
local_media_mxcs.append(media_id)
else:
remote_media_mxcs.append((hostname, media_id))

return local_media_mxcs, remote_media_mxcs