Skip to content

Commit

Permalink
fix: some special characters in URLs were not being handled correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenmacdonald committed Jun 2, 2023
1 parent 7c7f313 commit 968a846
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 13 deletions.
38 changes: 28 additions & 10 deletions integration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,34 @@ Deno.test({
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// non-ascii characters in URLs

Deno.test({
name: "getObject()/putObject work with non-ASCII characters in URLs",
fn: async () => {
const path = "файл/gemütlich.txt";
const contents = `This is the contents of the file called '${path}'.`;
await client.putObject(path, contents);
const response = await client.getObject(path);
assertEquals(await response.text(), contents);
},
});
for (
const path of [
"simple.txt",
"файл/gemütlich.txt",
"path with spaces.txt",
"yes&no.dat",
"foo(bar)",
"1+1=2",
"~backup<crazy>.foo",
]
) {
Deno.test({
name: `get/put/list with unicode or special characters in URLs: ${path}`,
// only: true,
fn: async () => {
const prefix = `filenames-test-${(Math.random() + 1).toString(36).substring(7)}/`;
const contents = `This is the contents of the file called '${path}'.`;
await client.putObject(prefix + path, contents);
const response = await client.getObject(prefix + path);
assertEquals(await response.text(), contents);
const names = [];
for await (const entry of client.listObjects({ prefix })) {
names.push(entry.key);
}
assertEquals(names, [prefix + path]);
},
});
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// presignedGetObject()
Expand Down
13 changes: 13 additions & 0 deletions signing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { bin2hex } from "./helpers.ts";
import { _internalMethods as methods, presignV4, signV4 } from "./signing.ts";

const {
awsUriEncode,
getHeadersToSign,
getCanonicalRequest,
getStringToSign,
Expand Down Expand Up @@ -277,3 +278,15 @@ Deno.test({
);
},
});

Deno.test({
name: "awsUriEncode",
fn: () => {
assertEquals(awsUriEncode("foo/bar", true), "foo/bar");
assertEquals(awsUriEncode("foo/bar", false), "foo%2Fbar");
assertEquals(awsUriEncode("ABC-XYZ-abc-xyz-012-789!"), "ABC-XYZ-abc-xyz-012-789%21");
assertEquals(awsUriEncode("a.b-c_d~e"), "a.b-c_d~e");
assertEquals(awsUriEncode("words with spaces"), "words%20with%20spaces");
assertEquals(awsUriEncode("файл"), "%D1%84%D0%B0%D0%B9%D0%BB");
},
});
49 changes: 48 additions & 1 deletion signing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,52 @@ function getHeadersToSign(headers: Headers): string[] {
return headersToSign;
}

const CODES = {
A: "A".charCodeAt(0),
Z: "Z".charCodeAt(0),
a: "a".charCodeAt(0),
z: "z".charCodeAt(0),
"0": "0".charCodeAt(0),
"9": "9".charCodeAt(0),
"/": "/".charCodeAt(0),
};
const ALLOWED_BYTES = "-._~".split("").map((s) => s.charCodeAt(0));

/**
* Canonical URI encoding for signing, per AWS documentation:
* 1. URI encode every byte except the unreserved characters:
* 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'.
* 2. The space character must be encoded as "%20" (and not as "+").
* 3. Each URI encoded byte is formed by a '%' and the
* two-digit uppercase hexadecimal value of the byte. e.g. "%1A".
* 4. Encode the forward slash character, '/', everywhere except
* in the object key name. For example, if the object key name
* is photos/Jan/sample.jpg, the forward slash in the key name
* is not encoded.
*
* See https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
*
* @param string the string to encode.
*/
function awsUriEncode(string: string, allowSlashes = false) {
const bytes: Uint8Array = new TextEncoder().encode(string);
let encoded = "";
for (const byte of bytes) {
if (
(byte >= CODES.A && byte <= CODES.Z) ||
(byte >= CODES.a && byte <= CODES.z) ||
(byte >= CODES["0"] && byte <= CODES["9"]) ||
(ALLOWED_BYTES.includes(byte)) ||
(byte == CODES["/"] && allowSlashes)
) {
encoded += String.fromCharCode(byte);
} else {
encoded += "%" + byte.toString(16).padStart(2, "0").toUpperCase();
}
}
return encoded;
}

/**
* getCanonicalRequest generate a canonical request of style.
*
Expand Down Expand Up @@ -203,7 +249,7 @@ function getCanonicalRequest(

const canonical = [];
canonical.push(method.toUpperCase());
canonical.push(encodeURI(requestResource));
canonical.push(awsUriEncode(requestResource, true));
canonical.push(requestQuery);
canonical.push(headersArray.join("\n") + "\n");
canonical.push(headersToSign.join(";").toLowerCase());
Expand Down Expand Up @@ -273,6 +319,7 @@ async function sha256hmac(

// Export for testing purposes only
export const _internalMethods = {
awsUriEncode,
getHeadersToSign,
getCanonicalRequest,
getStringToSign,
Expand Down
9 changes: 7 additions & 2 deletions xml-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ export function parse(xml: string): Document {
*/
function content() {
const m = match(/^([^<]*)/);
if (m) return m[1];
if (m) return entities(m[1]);
return "";
}

Expand All @@ -132,7 +132,7 @@ export function parse(xml: string): Document {
function attribute() {
const m = match(/([\w:-]+)\s*=\s*("[^"]*"|'[^']*'|\w+)\s*/);
if (!m) return;
return { name: m[1], value: strip(m[2]) };
return { name: m[1], value: entities(strip(m[2])) };
}

/**
Expand All @@ -142,6 +142,11 @@ export function parse(xml: string): Document {
return val.replace(/^['"]|['"]$/g, "");
}

/** Basic handling of entities: &amp; &lt; &gt; */
function entities(val: string) {
return val.replaceAll("&lt;", "<").replaceAll("&gt;", ">").replaceAll("&amp;", "&");
}

/**
* Match `re` and advance the string.
*/
Expand Down

0 comments on commit 968a846

Please sign in to comment.