Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix parsing of Content-Disposition headers #4763

Merged
merged 4 commits into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/4763.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix parsing of Content-Disposition headers on remote media requests and URL previews.
72 changes: 52 additions & 20 deletions synapse/rest/media/v1/_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
# Copyright 2019 New Vector Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -213,8 +214,7 @@ def get_filename_from_headers(headers):
Content-Disposition HTTP header.

Args:
headers (twisted.web.http_headers.Headers): The HTTP
request headers.
headers (dict[bytes, list[bytes]]): The HTTP request headers.

Returns:
A Unicode string of the filename, or None.
Expand All @@ -225,23 +225,12 @@ def get_filename_from_headers(headers):
if not content_disposition[0]:
return

# dict of unicode: bytes, corresponding to the key value sections of the
# Content-Disposition header.
params = {}
parts = content_disposition[0].split(b";")
for i in parts:
# Split into key-value pairs, if able
# We don't care about things like `inline`, so throw it out
if b"=" not in i:
continue

key, value = i.strip().split(b"=")
params[key.decode('ascii')] = value
_, params = _parse_header(content_disposition[0])

upload_name = None

# First check if there is a valid UTF-8 filename
upload_name_utf8 = params.get("filename*", None)
upload_name_utf8 = params.get(b"filename*", None)
if upload_name_utf8:
if upload_name_utf8.lower().startswith(b"utf-8''"):
upload_name_utf8 = upload_name_utf8[7:]
Expand All @@ -267,12 +256,55 @@ def get_filename_from_headers(headers):

# If there isn't check for an ascii name.
if not upload_name:
upload_name_ascii = params.get("filename", None)
upload_name_ascii = params.get(b"filename", None)
if upload_name_ascii and is_ascii(upload_name_ascii):
# Make sure there's no %-quoted bytes. If there is, reject it as
# non-valid ASCII.
if b"%" not in upload_name_ascii:
upload_name = upload_name_ascii.decode('ascii')
upload_name = upload_name_ascii.decode('ascii')

# This may be None here, indicating we did not find a matching name.
return upload_name


def _parse_header(line):
"""Parse a Content-type like header.

Cargo-culted from `cgi`, but works on bytes rather than strings.
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this return?

parts = _parseparam(b';' + line)
key = parts.__next__()
pdict = {}
for p in parts:
i = p.find(b'=')
if i >= 0:
name = p[:i].strip().lower()
value = p[i + 1:].strip()

# strip double-quotes
if len(value) >= 2 and value[0:1] == value[-1:] == b'"':
value = value[1:-1]
value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')
pdict[name] = value

return key, pdict


def _parseparam(s):
"""Generator which splits the input on ;, respecting double-quoted sequences

Cargo-culted from `cgi`, but works on bytes rather than strings.
"""
while s[:1] == b';':
s = s[1:]

# look for the next ;
end = s.find(b';')

# if there is an odd number of " marks between here and the next ;, skip to the
# next ; instead
while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
end = s.find(b';', end + 1)

if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]
45 changes: 45 additions & 0 deletions tests/rest/media/v1/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
# Copyright 2019 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.rest.media.v1._base import get_filename_from_headers

from tests import unittest


class GetFileNameFromHeadersTests(unittest.TestCase):
# input -> expected result
TEST_CASES = {
b"inline; filename=abc.txt": u"abc.txt",
b'inline; filename="azerty"': u"azerty",
b'inline; filename="aze%20rty"': u"aze%20rty",
b'inline; filename="aze\"rty"': u'aze"rty',
b'inline; filename="azer;ty"': u"azer;ty",

b"inline; filename*=utf-8''foo%C2%A3bar": u"foo£bar",
}

def tests(self):
for hdr, expected in self.TEST_CASES.items():
res = get_filename_from_headers(
{
b'Content-Disposition': [hdr],
},
)
self.assertEqual(
res, expected,
"expected output for %s to be %s but was %s" % (
hdr, expected, res,
)
)