From 968a84692541dcb525b84594b4ac6eb6a837ea1d Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 1 Jun 2023 22:05:18 -0700 Subject: [PATCH] fix: some special characters in URLs were not being handled correctly --- integration.ts | 38 ++++++++++++++++++++++++++++---------- signing.test.ts | 13 +++++++++++++ signing.ts | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- xml-parser.ts | 9 +++++++-- 4 files changed, 96 insertions(+), 13 deletions(-) diff --git a/integration.ts b/integration.ts index a3539c3..39206d2 100644 --- a/integration.ts +++ b/integration.ts @@ -194,16 +194,34 @@ Deno.test({ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // non-ascii characters in URLs -Deno.test({ - name: "getObject()/putObject work with non-ASCII characters in URLs", - fn: async () => { - const path = "файл/gemütlich.txt"; - const contents = `This is the contents of the file called '${path}'.`; - await client.putObject(path, contents); - const response = await client.getObject(path); - assertEquals(await response.text(), contents); - }, -}); +for ( + const path of [ + "simple.txt", + "файл/gemütlich.txt", + "path with spaces.txt", + "yes&no.dat", + "foo(bar)", + "1+1=2", + "~backup.foo", + ] +) { + Deno.test({ + name: `get/put/list with unicode or special characters in URLs: ${path}`, + // only: true, + fn: async () => { + const prefix = `filenames-test-${(Math.random() + 1).toString(36).substring(7)}/`; + const contents = `This is the contents of the file called '${path}'.`; + await client.putObject(prefix + path, contents); + const response = await client.getObject(prefix + path); + assertEquals(await response.text(), contents); + const names = []; + for await (const entry of client.listObjects({ prefix })) { + names.push(entry.key); + } + assertEquals(names, [prefix + path]); + }, + }); +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // presignedGetObject() diff --git a/signing.test.ts b/signing.test.ts index 32ad6ef..461f755 100644 --- a/signing.test.ts +++ b/signing.test.ts @@ -3,6 +3,7 @@ import { bin2hex } from "./helpers.ts"; import { _internalMethods as methods, presignV4, signV4 } from "./signing.ts"; const { + awsUriEncode, getHeadersToSign, getCanonicalRequest, getStringToSign, @@ -277,3 +278,15 @@ Deno.test({ ); }, }); + +Deno.test({ + name: "awsUriEncode", + fn: () => { + assertEquals(awsUriEncode("foo/bar", true), "foo/bar"); + assertEquals(awsUriEncode("foo/bar", false), "foo%2Fbar"); + assertEquals(awsUriEncode("ABC-XYZ-abc-xyz-012-789!"), "ABC-XYZ-abc-xyz-012-789%21"); + assertEquals(awsUriEncode("a.b-c_d~e"), "a.b-c_d~e"); + assertEquals(awsUriEncode("words with spaces"), "words%20with%20spaces"); + assertEquals(awsUriEncode("файл"), "%D1%84%D0%B0%D0%B9%D0%BB"); + }, +}); diff --git a/signing.ts b/signing.ts index 7a8b9aa..cc58695 100644 --- a/signing.ts +++ b/signing.ts @@ -164,6 +164,52 @@ function getHeadersToSign(headers: Headers): string[] { return headersToSign; } +const CODES = { + A: "A".charCodeAt(0), + Z: "Z".charCodeAt(0), + a: "a".charCodeAt(0), + z: "z".charCodeAt(0), + "0": "0".charCodeAt(0), + "9": "9".charCodeAt(0), + "/": "/".charCodeAt(0), +}; +const ALLOWED_BYTES = "-._~".split("").map((s) => s.charCodeAt(0)); + +/** + * Canonical URI encoding for signing, per AWS documentation: + * 1. URI encode every byte except the unreserved characters: + * 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'. + * 2. The space character must be encoded as "%20" (and not as "+"). + * 3. Each URI encoded byte is formed by a '%' and the + * two-digit uppercase hexadecimal value of the byte. e.g. "%1A". + * 4. Encode the forward slash character, '/', everywhere except + * in the object key name. For example, if the object key name + * is photos/Jan/sample.jpg, the forward slash in the key name + * is not encoded. + * + * See https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html + * + * @param string the string to encode. + */ +function awsUriEncode(string: string, allowSlashes = false) { + const bytes: Uint8Array = new TextEncoder().encode(string); + let encoded = ""; + for (const byte of bytes) { + if ( + (byte >= CODES.A && byte <= CODES.Z) || + (byte >= CODES.a && byte <= CODES.z) || + (byte >= CODES["0"] && byte <= CODES["9"]) || + (ALLOWED_BYTES.includes(byte)) || + (byte == CODES["/"] && allowSlashes) + ) { + encoded += String.fromCharCode(byte); + } else { + encoded += "%" + byte.toString(16).padStart(2, "0").toUpperCase(); + } + } + return encoded; +} + /** * getCanonicalRequest generate a canonical request of style. * @@ -203,7 +249,7 @@ function getCanonicalRequest( const canonical = []; canonical.push(method.toUpperCase()); - canonical.push(encodeURI(requestResource)); + canonical.push(awsUriEncode(requestResource, true)); canonical.push(requestQuery); canonical.push(headersArray.join("\n") + "\n"); canonical.push(headersToSign.join(";").toLowerCase()); @@ -273,6 +319,7 @@ async function sha256hmac( // Export for testing purposes only export const _internalMethods = { + awsUriEncode, getHeadersToSign, getCanonicalRequest, getStringToSign, diff --git a/xml-parser.ts b/xml-parser.ts index 03a220a..64f3ec8 100644 --- a/xml-parser.ts +++ b/xml-parser.ts @@ -122,7 +122,7 @@ export function parse(xml: string): Document { */ function content() { const m = match(/^([^<]*)/); - if (m) return m[1]; + if (m) return entities(m[1]); return ""; } @@ -132,7 +132,7 @@ export function parse(xml: string): Document { function attribute() { const m = match(/([\w:-]+)\s*=\s*("[^"]*"|'[^']*'|\w+)\s*/); if (!m) return; - return { name: m[1], value: strip(m[2]) }; + return { name: m[1], value: entities(strip(m[2])) }; } /** @@ -142,6 +142,11 @@ export function parse(xml: string): Document { return val.replace(/^['"]|['"]$/g, ""); } + /** Basic handling of entities: & < > */ + function entities(val: string) { + return val.replaceAll("<", "<").replaceAll(">", ">").replaceAll("&", "&"); + } + /** * Match `re` and advance the string. */