Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Implement a content type allow list for URL previews #11936

Merged
merged 12 commits into from
Feb 10, 2022
1 change: 1 addition & 0 deletions changelog.d/11936.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Don't attempt URL previews for URLs with audio/* or video/* content types. This prevents Synapse from making useless longer-lived connections to streaming media servers.
dkasak marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 14 additions & 0 deletions synapse/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,14 @@ async def get_file(
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
)

if b"Content-Type" in resp_headers:
clokep marked this conversation as resolved.
Show resolved Hide resolved
content_type = resp_headers[b"Content-Type"][0]
if _is_av_media(content_type):
raise SynapseError(
HTTPStatus.BAD_GATEWAY,
("Unsupported content type for URL previews: %r" % content_type),
dkasak marked this conversation as resolved.
Show resolved Hide resolved
)

# TODO: if our Content-Type is HTML or something, just read the first
# N bytes into RAM rather than saving it all to disk only to read it
# straight back in again
Expand Down Expand Up @@ -762,6 +770,12 @@ async def get_file(
)


def _is_av_media(content_type: bytes) -> bool:
return content_type.lower().startswith(
b"video/"
) or content_type.lower().startswith(b"audio/")


def _timeout_to_request_timed_out_error(f: Failure):
if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
# The TCP connection has its own timeout (set by the 'connectTimeout' param
Expand Down
72 changes: 72 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,78 @@ def test_non_ascii_preview_httpequiv(self):
self.assertEqual(channel.code, 200)
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")

def test_video_rejected(self):
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

end_content = b"anything"

channel = self.make_request(
"GET",
"preview_url?url=http://matrix.org",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b"Content-Type: video/mp4\r\n\r\n"
)
% (len(end_content))
+ end_content
)

self.pump()
self.assertEqual(channel.code, 502)
self.assertEqual(
channel.json_body,
{
"errcode": "M_UNKNOWN",
"error": "Unsupported content type for URL previews: b'video/mp4'",
},
)

def test_audio_rejected(self):
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

end_content = b"anything"

channel = self.make_request(
"GET",
"preview_url?url=http://matrix.org",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b"Content-Type: audio/aac\r\n\r\n"
)
% (len(end_content))
+ end_content
)

self.pump()
self.assertEqual(channel.code, 502)
self.assertEqual(
channel.json_body,
{
"errcode": "M_UNKNOWN",
"error": "Unsupported content type for URL previews: b'audio/aac'",
},
)

def test_non_ascii_preview_content_type(self):
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

Expand Down