Skip to content

Commit

Permalink
use a lookup table
Browse files Browse the repository at this point in the history
  • Loading branch information
paperclover committed Jan 28, 2025
1 parent edde269 commit cdd2242
Showing 1 changed file with 15 additions and 16 deletions.
31 changes: 15 additions & 16 deletions Source/WTF/wtf/URL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -814,25 +814,24 @@ void URL::setQuery(StringView newQuery)
maybeTrimTrailingSpacesFromOpaquePath();
}

// To match Node.js pathToFileURL, the following chars are escaped: \0, \t, \n, \r, " # % ? [ ] ^ | ~
// https://github.com/nodejs/node/blob/532fff6b27be6b0d833d06b4a9fe46d6fb7f0f6c/src/node_url.cc#L82-L121
// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
static constexpr uint64_t escapeTable[] = {
// 0-63: Only specific control chars (\0, \t, \n, \r), space, ", #, %, ?
(1ULL << 0) | (1ULL << '\t') | (1ULL << '\n') | (1ULL << '\r') |
(1ULL << ' ') | (1ULL << '"') | (1ULL << '#') | (1ULL << '%') | (1ULL << '?'),

// 64-127: [, \, ], ^, |, ~
(1ULL << ('[' - 64)) | (1ULL << ('\\' - 64)) | (1ULL << (']' - 64)) |
(1ULL << ('^' - 64)) | (1ULL << ('|' - 64)) | (1ULL << ('~' - 64))
};

static String escapePathWithoutCopying(StringView path)
{
auto questionMarkOrNumberSignOrNonASCII = [](UChar character) {
return character == '\0'
|| character == '\t'
|| character == '\n'
|| character == '\r'
|| character == ' '
|| character == '"'
|| character == '#'
|| character == '%'
|| character == '?'
|| character == '['
|| character == '\\'
|| character == ']'
|| character == '^'
|| character == '|'
|| character == '~'
|| !isASCII(character);
return character >= 128 || ((escapeTable[character >> 6] >> (character & 63)) & 1);
};
return percentEncodeCharacters(path, questionMarkOrNumberSignOrNonASCII);
}
Expand Down

0 comments on commit cdd2242

Please sign in to comment.