Skip to content

Commit

Permalink
Merge pull request #9773 from Rob--W/content-disposition-textdecode-utf8
Browse files Browse the repository at this point in the history
Fix multibyte decoding in content_disposition.js
  • Loading branch information
timvandermeij authored Jun 3, 2018
2 parents 2921cc0 + 0e4e791 commit 3859c63
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions src/display/content_disposition.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,27 @@ function getFilenameFromContentDispositionHeader(contentDisposition) {
}
function textdecode(encoding, value) {
if (encoding) {
if (!/^[^\x00-\xFF]+$/.test(value)) {
if (!/^[\x00-\xFF]+$/.test(value)) {
return value;
}
try {
let decoder = new TextDecoder(encoding, { fatal: true, });
let bytes = new Array(value.length);
for (let i = 0; i < value.length; ++i) {
bytes[i] = value.charCodeAt(0);
bytes[i] = value.charCodeAt(i);
}
value = decoder.decode(new Uint8Array(bytes));
needsEncodingFixup = false;
} catch (e) {
// TextDecoder constructor threw - unrecognized encoding.
// Or TextDecoder API is not available.
// Or TextDecoder API is not available (in IE / Edge).
if (/^utf-?8$/i.test(encoding)) {
// UTF-8 is commonly used, try to support it in another way:
value = decodeURIComponent(escape(value));
needsEncodingFixup = false;
try {
value = decodeURIComponent(escape(value));
needsEncodingFixup = false;
} catch (err) {
}
}
}
}
Expand All @@ -104,7 +107,11 @@ function getFilenameFromContentDispositionHeader(contentDisposition) {
function fixupEncoding(value) {
if (needsEncodingFixup && /[\x80-\xff]/.test(value)) {
// Maybe multi-byte UTF-8.
return textdecode('utf-8', value);
value = textdecode('utf-8', value);
if (needsEncodingFixup) {
// Try iso-8859-1 encoding.
value = textdecode('iso-8859-1', value);
}
}
return value;
}
Expand Down Expand Up @@ -206,10 +213,10 @@ function getFilenameFromContentDispositionHeader(contentDisposition) {
return textdecode(charset, text);
} // else encoding is b or B - base64 (RFC 2047 section 4.1)
try {
return atob(text);
text = atob(text);
} catch (e) {
return text;
}
return textdecode(charset, text);
});
}

Expand Down

0 comments on commit 3859c63

Please sign in to comment.