Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: some special characters in URLs were not being handled correctly #16

Merged
merged 1 commit into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions integration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,34 @@ Deno.test({
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// non-ascii characters in URLs

Deno.test({
name: "getObject()/putObject work with non-ASCII characters in URLs",
fn: async () => {
const path = "файл/gemütlich.txt";
const contents = `This is the contents of the file called '${path}'.`;
await client.putObject(path, contents);
const response = await client.getObject(path);
assertEquals(await response.text(), contents);
},
});
for (
const path of [
"simple.txt",
"файл/gemütlich.txt",
"path with spaces.txt",
"yes&no.dat",
"foo(bar)",
"1+1=2",
"~backup<crazy>.foo",
]
) {
Deno.test({
name: `get/put/list with unicode or special characters in URLs: ${path}`,
// only: true,
fn: async () => {
const prefix = `filenames-test-${(Math.random() + 1).toString(36).substring(7)}/`;
const contents = `This is the contents of the file called '${path}'.`;
await client.putObject(prefix + path, contents);
const response = await client.getObject(prefix + path);
assertEquals(await response.text(), contents);
const names = [];
for await (const entry of client.listObjects({ prefix })) {
names.push(entry.key);
}
assertEquals(names, [prefix + path]);
},
});
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// presignedGetObject()
Expand Down
13 changes: 13 additions & 0 deletions signing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { bin2hex } from "./helpers.ts";
import { _internalMethods as methods, presignV4, signV4 } from "./signing.ts";

const {
awsUriEncode,
getHeadersToSign,
getCanonicalRequest,
getStringToSign,
Expand Down Expand Up @@ -277,3 +278,15 @@ Deno.test({
);
},
});

Deno.test({
name: "awsUriEncode",
fn: () => {
assertEquals(awsUriEncode("foo/bar", true), "foo/bar");
assertEquals(awsUriEncode("foo/bar", false), "foo%2Fbar");
assertEquals(awsUriEncode("ABC-XYZ-abc-xyz-012-789!"), "ABC-XYZ-abc-xyz-012-789%21");
assertEquals(awsUriEncode("a.b-c_d~e"), "a.b-c_d~e");
assertEquals(awsUriEncode("words with spaces"), "words%20with%20spaces");
assertEquals(awsUriEncode("файл"), "%D1%84%D0%B0%D0%B9%D0%BB");
},
});
49 changes: 48 additions & 1 deletion signing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,52 @@ function getHeadersToSign(headers: Headers): string[] {
return headersToSign;
}

const CODES = {
A: "A".charCodeAt(0),
Z: "Z".charCodeAt(0),
a: "a".charCodeAt(0),
z: "z".charCodeAt(0),
"0": "0".charCodeAt(0),
"9": "9".charCodeAt(0),
"/": "/".charCodeAt(0),
};
const ALLOWED_BYTES = "-._~".split("").map((s) => s.charCodeAt(0));

/**
* Canonical URI encoding for signing, per AWS documentation:
* 1. URI encode every byte except the unreserved characters:
* 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'.
* 2. The space character must be encoded as "%20" (and not as "+").
* 3. Each URI encoded byte is formed by a '%' and the
* two-digit uppercase hexadecimal value of the byte. e.g. "%1A".
* 4. Encode the forward slash character, '/', everywhere except
* in the object key name. For example, if the object key name
* is photos/Jan/sample.jpg, the forward slash in the key name
* is not encoded.
*
* See https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
*
* @param string the string to encode.
*/
function awsUriEncode(string: string, allowSlashes = false) {
const bytes: Uint8Array = new TextEncoder().encode(string);
let encoded = "";
for (const byte of bytes) {
if (
(byte >= CODES.A && byte <= CODES.Z) ||
(byte >= CODES.a && byte <= CODES.z) ||
(byte >= CODES["0"] && byte <= CODES["9"]) ||
(ALLOWED_BYTES.includes(byte)) ||
(byte == CODES["/"] && allowSlashes)
) {
encoded += String.fromCharCode(byte);
} else {
encoded += "%" + byte.toString(16).padStart(2, "0").toUpperCase();
}
}
return encoded;
}

/**
* getCanonicalRequest generate a canonical request of style.
*
Expand Down Expand Up @@ -203,7 +249,7 @@ function getCanonicalRequest(

const canonical = [];
canonical.push(method.toUpperCase());
canonical.push(encodeURI(requestResource));
canonical.push(awsUriEncode(requestResource, true));
canonical.push(requestQuery);
canonical.push(headersArray.join("\n") + "\n");
canonical.push(headersToSign.join(";").toLowerCase());
Expand Down Expand Up @@ -273,6 +319,7 @@ async function sha256hmac(

// Export for testing purposes only
export const _internalMethods = {
awsUriEncode,
getHeadersToSign,
getCanonicalRequest,
getStringToSign,
Expand Down
9 changes: 7 additions & 2 deletions xml-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ export function parse(xml: string): Document {
*/
function content() {
const m = match(/^([^<]*)/);
if (m) return m[1];
if (m) return entities(m[1]);
return "";
}

Expand All @@ -132,7 +132,7 @@ export function parse(xml: string): Document {
function attribute() {
const m = match(/([\w:-]+)\s*=\s*("[^"]*"|'[^']*'|\w+)\s*/);
if (!m) return;
return { name: m[1], value: strip(m[2]) };
return { name: m[1], value: entities(strip(m[2])) };
}

/**
Expand All @@ -142,6 +142,11 @@ export function parse(xml: string): Document {
return val.replace(/^['"]|['"]$/g, "");
}

/** Basic handling of entities: &amp; &lt; &gt; */
function entities(val: string) {
return val.replaceAll("&lt;", "<").replaceAll("&gt;", ">").replaceAll("&amp;", "&");
}

/**
* Match `re` and advance the string.
*/
Expand Down