Skip to content

Commit

Permalink
escape more characters in fileURLWithFileSystemPath (#81)
Browse files Browse the repository at this point in the history
* escape more characters in fileURLWithFileSystemPath

* use a lookup table
  • Loading branch information
paperclover authored Jan 28, 2025
1 parent e6cb36c commit a4bed6d
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions Source/WTF/wtf/URL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -814,10 +814,24 @@ void URL::setQuery(StringView newQuery)
maybeTrimTrailingSpacesFromOpaquePath();
}

// To match Node.js pathToFileURL, the following chars are escaped: \0, \t, \n, \r, " # % ? [ ] ^ | ~
// https://github.com/nodejs/node/blob/532fff6b27be6b0d833d06b4a9fe46d6fb7f0f6c/src/node_url.cc#L82-L121
// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
static constexpr uint64_t escapeTable[] = {
// 0-63: Only specific control chars (\0, \t, \n, \r), space, ", #, %, ?
(1ULL << 0) | (1ULL << '\t') | (1ULL << '\n') | (1ULL << '\r') |
(1ULL << ' ') | (1ULL << '"') | (1ULL << '#') | (1ULL << '%') | (1ULL << '?'),

// 64-127: [, \, ], ^, |, ~
(1ULL << ('[' - 64)) | (1ULL << ('\\' - 64)) | (1ULL << (']' - 64)) |
(1ULL << ('^' - 64)) | (1ULL << ('|' - 64)) | (1ULL << ('~' - 64))
};

static String escapePathWithoutCopying(StringView path)
{
auto questionMarkOrNumberSignOrNonASCII = [] (UChar character) {
return character == '?' || character == '#' || !isASCII(character);
auto questionMarkOrNumberSignOrNonASCII = [](UChar character) {
return character >= 128 || ((escapeTable[character >> 6] >> (character & 63)) & 1);
};
return percentEncodeCharacters(path, questionMarkOrNumberSignOrNonASCII);
}
Expand Down

0 comments on commit a4bed6d

Please sign in to comment.