This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix Content-Disposition in media repository #4176
Merged
Merged
Changes from 11 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
b65fe27
fixes
hawkowl ac1149d
some initial testing
hawkowl a387992
fix
hawkowl c15ed71
fix
hawkowl 37ab57a
remove code duplication and fix the preview API suffering a similar bug
hawkowl bc52fe5
fix on py2
hawkowl 8007cdf
fix packaging
hawkowl 4d42256
Update synapse/rest/media/v1/_base.py
richvdh 446117d
fixes
hawkowl 624b3c9
Merge branch 'hawkowl/py3-content-disposition' of github.com:matrix-o…
hawkowl de00bd5
Merge remote-tracking branch 'origin/develop' into hawkowl/py3-conten…
hawkowl 92415bc
fix comments
hawkowl File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
The media repository now no longer fails to decode UTF-8 filenames when downloading remote media. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -16,6 +16,7 @@ | |||||
import logging | ||||||
import os | ||||||
|
||||||
from six import PY3 | ||||||
from six.moves import urllib | ||||||
|
||||||
from twisted.internet import defer | ||||||
|
@@ -48,26 +49,21 @@ def parse_media_id(request): | |||||
return server_name, media_id, file_name | ||||||
except Exception: | ||||||
raise SynapseError( | ||||||
404, | ||||||
"Invalid media id token %r" % (request.postpath,), | ||||||
Codes.UNKNOWN, | ||||||
404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN | ||||||
) | ||||||
|
||||||
|
||||||
def respond_404(request): | ||||||
respond_with_json( | ||||||
request, 404, | ||||||
cs_error( | ||||||
"Not found %r" % (request.postpath,), | ||||||
code=Codes.NOT_FOUND, | ||||||
), | ||||||
send_cors=True | ||||||
request, | ||||||
404, | ||||||
cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), | ||||||
send_cors=True, | ||||||
) | ||||||
|
||||||
|
||||||
@defer.inlineCallbacks | ||||||
def respond_with_file(request, media_type, file_path, | ||||||
file_size=None, upload_name=None): | ||||||
def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None): | ||||||
logger.debug("Responding with %r", file_path) | ||||||
|
||||||
if os.path.isfile(file_path): | ||||||
|
@@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name): | |||||
file_size (int): Size in bytes of the media, if known. | ||||||
upload_name (str): The name of the requested file, if any. | ||||||
""" | ||||||
|
||||||
def _quote(x): | ||||||
return urllib.parse.quote(x.encode("utf-8")) | ||||||
|
||||||
request.setHeader(b"Content-Type", media_type.encode("UTF-8")) | ||||||
if upload_name: | ||||||
if is_ascii(upload_name): | ||||||
disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") | ||||||
disposition = "inline; filename=%s" % (_quote(upload_name),) | ||||||
else: | ||||||
disposition = ( | ||||||
"inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") | ||||||
disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) | ||||||
|
||||||
request.setHeader(b"Content-Disposition", disposition) | ||||||
request.setHeader(b"Content-Disposition", disposition.encode('ascii')) | ||||||
|
||||||
# cache for at least a day. | ||||||
# XXX: we might want to turn this off for data we don't want to | ||||||
# recommend caching as it's sensitive or private - or at least | ||||||
# select private. don't bother setting Expires as all our | ||||||
# clients are smart enough to be happy with Cache-Control | ||||||
request.setHeader( | ||||||
b"Cache-Control", b"public,max-age=86400,s-maxage=86400" | ||||||
) | ||||||
|
||||||
request.setHeader( | ||||||
b"Content-Length", b"%d" % (file_size,) | ||||||
) | ||||||
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") | ||||||
request.setHeader(b"Content-Length", b"%d" % (file_size,)) | ||||||
|
||||||
|
||||||
@defer.inlineCallbacks | ||||||
|
@@ -153,6 +144,7 @@ class Responder(object): | |||||
Responder is a context manager which *must* be used, so that any resources | ||||||
held can be cleaned up. | ||||||
""" | ||||||
|
||||||
def write_to_consumer(self, consumer): | ||||||
"""Stream response into consumer | ||||||
|
||||||
|
@@ -186,9 +178,18 @@ class FileInfo(object): | |||||
thumbnail_method (str) | ||||||
thumbnail_type (str): Content type of thumbnail, e.g. image/png | ||||||
""" | ||||||
def __init__(self, server_name, file_id, url_cache=False, | ||||||
thumbnail=False, thumbnail_width=None, thumbnail_height=None, | ||||||
thumbnail_method=None, thumbnail_type=None): | ||||||
|
||||||
def __init__( | ||||||
self, | ||||||
server_name, | ||||||
file_id, | ||||||
url_cache=False, | ||||||
thumbnail=False, | ||||||
thumbnail_width=None, | ||||||
thumbnail_height=None, | ||||||
thumbnail_method=None, | ||||||
thumbnail_type=None, | ||||||
): | ||||||
self.server_name = server_name | ||||||
self.file_id = file_id | ||||||
self.url_cache = url_cache | ||||||
|
@@ -197,3 +198,71 @@ def __init__(self, server_name, file_id, url_cache=False, | |||||
self.thumbnail_height = thumbnail_height | ||||||
self.thumbnail_method = thumbnail_method | ||||||
self.thumbnail_type = thumbnail_type | ||||||
|
||||||
|
||||||
def get_filename_from_headers(headers): | ||||||
""" | ||||||
Get the filename of the downloaded file by inspecting the | ||||||
Content-Disposition HTTP header. | ||||||
|
||||||
Args: | ||||||
headers (twisted.web.http_headers.Headers): The HTTP | ||||||
request headers. | ||||||
|
||||||
Returns: | ||||||
A Unicode string of the filename, or None. | ||||||
""" | ||||||
content_disposition = headers.get(b"Content-Disposition", [b'']) | ||||||
|
||||||
# No header, bail out. | ||||||
if not content_disposition[0]: | ||||||
return | ||||||
|
||||||
params = {} | ||||||
parts = content_disposition[0].split(b";") | ||||||
for i in parts: | ||||||
# Split into key-value pairs, if able | ||||||
if b"=" not in i: | ||||||
continue | ||||||
|
||||||
key, value = i.strip().split(b"=") | ||||||
# Store it with a decoded key and unencoded value | ||||||
params[key.decode('ascii')] = value | ||||||
|
||||||
upload_name = None | ||||||
|
||||||
# First check if there is a valid UTF-8 filename | ||||||
upload_name_utf8 = params.get("filename*", None) | ||||||
if upload_name_utf8: | ||||||
if upload_name_utf8.lower().startswith(b"utf-8''"): | ||||||
upload_name_utf8 = upload_name_utf8[7:] | ||||||
if PY3: | ||||||
try: | ||||||
# We have a filename*= section. This MUST be ASCII, and any | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Half of this comment seems to apply to PY2 as well as PY3, so could it be pulled up? Also "quoted" doesn't mean much to me, even if that's what urllib calls it. Can we call it "%-encoded" or "%-escaped" or something? |
||||||
# UTF-8 bytes are quoted. Once it is decoded, we can then | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "decoded" is unclear to me. Can we say something like:
|
||||||
# unquote it strictly. | ||||||
upload_name = urllib.parse.unquote( | ||||||
upload_name_utf8.decode('ascii'), errors="strict" | ||||||
) | ||||||
except UnicodeDecodeError: | ||||||
# Incorrect UTF-8. | ||||||
pass | ||||||
else: | ||||||
# On Python 2, we can unquote it directly, and then decode it | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
# strictly. | ||||||
try: | ||||||
upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') | ||||||
except UnicodeDecodeError: | ||||||
pass | ||||||
|
||||||
# If there isn't check for an ascii name. | ||||||
if not upload_name: | ||||||
upload_name_ascii = params.get("filename", None) | ||||||
if upload_name_ascii and is_ascii(upload_name_ascii): | ||||||
# Make sure there's no percent-escaped bytes. If there is, reject it | ||||||
# as non-valid ASCII. | ||||||
if b"%" not in upload_name_ascii: | ||||||
upload_name = upload_name_ascii.decode('ascii') | ||||||
|
||||||
# This may be None here, indicating we did not find a matching name. | ||||||
return upload_name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd find a comment which documents the type of this useful. It seems to be a map from a
unicode
to abytes
?(Edit: I see that you kinda document this below, but I'd find it clearer here. Also the terms "decoded" and "unencoded" are pretty overloaded and unclear here)