-
Notifications
You must be signed in to change notification settings - Fork 29.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refs: web-platform-tests/wpt#26385 PR-URL: #36659 Reviewed-By: Michaël Zasso <[email protected]> Reviewed-By: Rich Trott <[email protected]>
- Loading branch information
1 parent
986d5ac
commit 4acc273
Showing
7 changed files
with
374 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
55 changes: 55 additions & 0 deletions
55
test/fixtures/wpt/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
<!doctype html> | ||
<script src=/resources/testharness.js></script> | ||
<script src=/resources/testharnessreport.js></script> | ||
<script src=resources/ranges.js></script> | ||
<script> | ||
const decode = (input, output, desc) => { | ||
test(function() { | ||
for (encoding of ["gb18030", "gbk"]) | ||
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(input)), output) | ||
}, "gb18030 decoder: " + desc) | ||
} | ||
|
||
decode([115], "s", "ASCII"); | ||
decode([0x80], "\u20AC", "euro"); | ||
decode([0xFF], "\uFFFD", "initial byte out of accepted ranges"); | ||
decode([0x81], "\uFFFD", "end of queue, gb18030 first not 0"); | ||
decode([0x81, 0x28], "\ufffd(", "two bytes 0x81 0x28"); | ||
decode([0x81, 0x40], "\u4E02", "two bytes 0x81 0x40"); | ||
decode([0x81, 0x7E], "\u4E8A", "two bytes 0x81 0x7e"); | ||
decode([0x81, 0x7F], "\ufffd\u007f", "two bytes 0x81 0x7f"); | ||
decode([0x81, 0x80], "\u4E90", "two bytes 0x81 0x80"); | ||
decode([0x81, 0xFE], "\u4FA2", "two bytes 0x81 0xFE"); | ||
decode([0x81, 0xFF], "\ufffd", "two bytes 0x81 0xFF"); | ||
decode([0xFE, 0x40], "\uFA0C", "two bytes 0xFE 0x40"); | ||
decode([0xFE, 0xFE], "\uE4C5", "two bytes 0xFE 0xFE"); | ||
decode([0xFE, 0xFF], "\ufffd", "two bytes 0xFE 0xFF"); | ||
decode([0x81, 0x30], "\ufffd", "two bytes 0x81 0x30"); | ||
decode([0x81, 0x30, 0xFE], "\ufffd", "three bytes 0x81 0x30 0xFE"); | ||
decode([0x81, 0x30, 0xFF], "\ufffd0\ufffd", "three bytes 0x81 0x30 0xFF"); | ||
decode([0x81, 0x30, 0xFE, 0x29], "\ufffd0\ufffd)", "four bytes 0x81 0x30 0xFE 0x29"); | ||
decode([0xFE, 0x39, 0xFE, 0x39], "\ufffd", "four bytes 0xFE 0x39 0xFE 0x39"); | ||
decode([0x81, 0x35, 0xF4, 0x36], "\u1E3E", "pointer 7458"); | ||
decode([0x81, 0x35, 0xF4, 0x37], "\ue7c7", "pointer 7457"); | ||
decode([0x81, 0x35, 0xF4, 0x38], "\u1E40", "pointer 7459"); | ||
decode([0x84, 0x31, 0xA4, 0x39], "\uffff", "pointer 39419"); | ||
decode([0x84, 0x31, 0xA5, 0x30], "\ufffd", "pointer 39420"); | ||
decode([0x8F, 0x39, 0xFE, 0x39], "\ufffd", "pointer 189999"); | ||
decode([0x90, 0x30, 0x81, 0x30], "\u{10000}", "pointer 189000"); | ||
decode([0xE3, 0x32, 0x9A, 0x35], "\u{10FFFF}", "pointer 1237575"); | ||
decode([0xE3, 0x32, 0x9A, 0x36], "\ufffd", "pointer 1237576"); | ||
decode([0x83, 0x36, 0xC8, 0x30], "\uE7C8", "legacy ICU special case 1"); | ||
decode([0xA1, 0xAD], "\u2026", "legacy ICU special case 2"); | ||
decode([0xA1, 0xAB], "\uFF5E", "legacy ICU special case 3"); | ||
|
||
let i = 0; | ||
for (const range of ranges) { | ||
const pointer = range[0]; | ||
decode([ | ||
Math.floor(pointer / 12600) + 0x81, | ||
Math.floor((pointer % 12600) / 1260) + 0x30, | ||
Math.floor((pointer % 1260) / 10) + 0x81, | ||
pointer % 10 + 0x30 | ||
], range[1], "range " + i++); | ||
} | ||
</script> |
48 changes: 48 additions & 0 deletions
48
test/fixtures/wpt/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
<!doctype html> | ||
<meta charset=gb18030> | ||
<script src=/resources/testharness.js></script> | ||
<script src=/resources/testharnessreport.js></script> | ||
<script src=resources/ranges.js></script> | ||
<script> | ||
const encode = (input, output, desc) => { | ||
test(function() { | ||
const a = document.createElement("a"); // <a> uses document encoding for URL's query | ||
a.href = "https://example.com/?" + input; | ||
assert_equals(a.search.substr(1), output); // remove leading "?" | ||
}, "gb18030 encoder: " + desc); | ||
} | ||
|
||
encode("s", "s", "very basic"); | ||
encode("\u20AC", "%A2%E3", "Euro"); | ||
encode("\u4E02", "%81@", "character"); | ||
encode("\uE4C6", "%A1@", "PUA"); | ||
encode("\uE4C5", "%FE%FE", "PUA #2"); | ||
encode("\uE5E5", "%26%2358853%3B", "PUA #3"); | ||
encode("\ud83d\udca9", "%949%DA3", "poo"); | ||
encode("\uE7C7", "%815%F47", "Ranges pointer special case"); | ||
encode("\uE7C8", "%836%C80", "legacy ICU special case 1"); | ||
encode("\u2026", "%A1%AD", "legacy ICU special case 2"); | ||
encode("\uFF5E", "%A1%AB", "legacy ICU special case 3"); | ||
|
||
const upperCaseNibble = x => { | ||
return Math.floor(x).toString(16).toUpperCase(); | ||
} | ||
|
||
const encodePointer = pointer => { | ||
const firstByte = Math.floor(pointer / 12600) + 0x81; | ||
const thirdByte = Math.floor((pointer % 1260) / 10) + 0x81; | ||
return "%" | ||
+ upperCaseNibble(firstByte / 16) | ||
+ upperCaseNibble(firstByte % 16) | ||
+ String.fromCharCode(Math.floor((pointer % 12600) / 1260) + 0x30) | ||
+ "%" | ||
+ upperCaseNibble(thirdByte / 16) | ||
+ upperCaseNibble(thirdByte % 16) | ||
+ String.fromCharCode(pointer % 10 + 0x30); | ||
} | ||
|
||
let i = 0; | ||
for (const range of ranges) { | ||
encode(range[1], encodePointer(range[0]), "range " + i++); | ||
} | ||
</script> |
210 changes: 210 additions & 0 deletions
210
test/fixtures/wpt/encoding/legacy-mb-schinese/gb18030/resources/ranges.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
// Based on https://encoding.spec.whatwg.org/index-gb18030-ranges.txt | ||
const ranges = [ | ||
[0, "\u0080"], | ||
[36, "\u00A5"], | ||
[38, "\u00A9"], | ||
[45, "\u00B2"], | ||
[50, "\u00B8"], | ||
[81, "\u00D8"], | ||
[89, "\u00E2"], | ||
[95, "\u00EB"], | ||
[96, "\u00EE"], | ||
[100, "\u00F4"], | ||
[103, "\u00F8"], | ||
[104, "\u00FB"], | ||
[105, "\u00FD"], | ||
[109, "\u0102"], | ||
[126, "\u0114"], | ||
[133, "\u011C"], | ||
[148, "\u012C"], | ||
[172, "\u0145"], | ||
[175, "\u0149"], | ||
[179, "\u014E"], | ||
[208, "\u016C"], | ||
[306, "\u01CF"], | ||
[307, "\u01D1"], | ||
[308, "\u01D3"], | ||
[309, "\u01D5"], | ||
[310, "\u01D7"], | ||
[311, "\u01D9"], | ||
[312, "\u01DB"], | ||
[313, "\u01DD"], | ||
[341, "\u01FA"], | ||
[428, "\u0252"], | ||
[443, "\u0262"], | ||
[544, "\u02C8"], | ||
[545, "\u02CC"], | ||
[558, "\u02DA"], | ||
[741, "\u03A2"], | ||
[742, "\u03AA"], | ||
[749, "\u03C2"], | ||
[750, "\u03CA"], | ||
[805, "\u0402"], | ||
[819, "\u0450"], | ||
[820, "\u0452"], | ||
[7922, "\u2011"], | ||
[7924, "\u2017"], | ||
[7925, "\u201A"], | ||
[7927, "\u201E"], | ||
[7934, "\u2027"], | ||
[7943, "\u2031"], | ||
[7944, "\u2034"], | ||
[7945, "\u2036"], | ||
[7950, "\u203C"], | ||
[8062, "\u20AD"], | ||
[8148, "\u2104"], | ||
[8149, "\u2106"], | ||
[8152, "\u210A"], | ||
[8164, "\u2117"], | ||
[8174, "\u2122"], | ||
[8236, "\u216C"], | ||
[8240, "\u217A"], | ||
[8262, "\u2194"], | ||
[8264, "\u219A"], | ||
[8374, "\u2209"], | ||
[8380, "\u2210"], | ||
[8381, "\u2212"], | ||
[8384, "\u2216"], | ||
[8388, "\u221B"], | ||
[8390, "\u2221"], | ||
[8392, "\u2224"], | ||
[8393, "\u2226"], | ||
[8394, "\u222C"], | ||
[8396, "\u222F"], | ||
[8401, "\u2238"], | ||
[8406, "\u223E"], | ||
[8416, "\u2249"], | ||
[8419, "\u224D"], | ||
[8424, "\u2253"], | ||
[8437, "\u2262"], | ||
[8439, "\u2268"], | ||
[8445, "\u2270"], | ||
[8482, "\u2296"], | ||
[8485, "\u229A"], | ||
[8496, "\u22A6"], | ||
[8521, "\u22C0"], | ||
[8603, "\u2313"], | ||
[8936, "\u246A"], | ||
[8946, "\u249C"], | ||
[9046, "\u254C"], | ||
[9050, "\u2574"], | ||
[9063, "\u2590"], | ||
[9066, "\u2596"], | ||
[9076, "\u25A2"], | ||
[9092, "\u25B4"], | ||
[9100, "\u25BE"], | ||
[9108, "\u25C8"], | ||
[9111, "\u25CC"], | ||
[9113, "\u25D0"], | ||
[9131, "\u25E6"], | ||
[9162, "\u2607"], | ||
[9164, "\u260A"], | ||
[9218, "\u2641"], | ||
[9219, "\u2643"], | ||
[11329, "\u2E82"], | ||
[11331, "\u2E85"], | ||
[11334, "\u2E89"], | ||
[11336, "\u2E8D"], | ||
[11346, "\u2E98"], | ||
[11361, "\u2EA8"], | ||
[11363, "\u2EAB"], | ||
[11366, "\u2EAF"], | ||
[11370, "\u2EB4"], | ||
[11372, "\u2EB8"], | ||
[11375, "\u2EBC"], | ||
[11389, "\u2ECB"], | ||
[11682, "\u2FFC"], | ||
[11686, "\u3004"], | ||
[11687, "\u3018"], | ||
[11692, "\u301F"], | ||
[11694, "\u302A"], | ||
[11714, "\u303F"], | ||
[11716, "\u3094"], | ||
[11723, "\u309F"], | ||
[11725, "\u30F7"], | ||
[11730, "\u30FF"], | ||
[11736, "\u312A"], | ||
[11982, "\u322A"], | ||
[11989, "\u3232"], | ||
[12102, "\u32A4"], | ||
[12336, "\u3390"], | ||
[12348, "\u339F"], | ||
[12350, "\u33A2"], | ||
[12384, "\u33C5"], | ||
[12393, "\u33CF"], | ||
[12395, "\u33D3"], | ||
[12397, "\u33D6"], | ||
[12510, "\u3448"], | ||
[12553, "\u3474"], | ||
[12851, "\u359F"], | ||
[12962, "\u360F"], | ||
[12973, "\u361B"], | ||
[13738, "\u3919"], | ||
[13823, "\u396F"], | ||
[13919, "\u39D1"], | ||
[13933, "\u39E0"], | ||
[14080, "\u3A74"], | ||
[14298, "\u3B4F"], | ||
[14585, "\u3C6F"], | ||
[14698, "\u3CE1"], | ||
[15583, "\u4057"], | ||
[15847, "\u4160"], | ||
[16318, "\u4338"], | ||
[16434, "\u43AD"], | ||
[16438, "\u43B2"], | ||
[16481, "\u43DE"], | ||
[16729, "\u44D7"], | ||
[17102, "\u464D"], | ||
[17122, "\u4662"], | ||
[17315, "\u4724"], | ||
[17320, "\u472A"], | ||
[17402, "\u477D"], | ||
[17418, "\u478E"], | ||
[17859, "\u4948"], | ||
[17909, "\u497B"], | ||
[17911, "\u497E"], | ||
[17915, "\u4984"], | ||
[17916, "\u4987"], | ||
[17936, "\u499C"], | ||
[17939, "\u49A0"], | ||
[17961, "\u49B8"], | ||
[18664, "\u4C78"], | ||
[18703, "\u4CA4"], | ||
[18814, "\u4D1A"], | ||
[18962, "\u4DAF"], | ||
[19043, "\u9FA6"], | ||
[33469, "\uE76C"], | ||
[33470, "\uE7C8"], | ||
[33471, "\uE7E7"], | ||
[33484, "\uE815"], | ||
[33485, "\uE819"], | ||
[33490, "\uE81F"], | ||
[33497, "\uE827"], | ||
[33501, "\uE82D"], | ||
[33505, "\uE833"], | ||
[33513, "\uE83C"], | ||
[33520, "\uE844"], | ||
[33536, "\uE856"], | ||
[33550, "\uE865"], | ||
[37845, "\uF92D"], | ||
[37921, "\uF97A"], | ||
[37948, "\uF996"], | ||
[38029, "\uF9E8"], | ||
[38038, "\uF9F2"], | ||
[38064, "\uFA10"], | ||
[38065, "\uFA12"], | ||
[38066, "\uFA15"], | ||
[38069, "\uFA19"], | ||
[38075, "\uFA22"], | ||
[38076, "\uFA25"], | ||
[38078, "\uFA2A"], | ||
[39108, "\uFE32"], | ||
[39109, "\uFE45"], | ||
[39113, "\uFE53"], | ||
[39114, "\uFE58"], | ||
[39115, "\uFE67"], | ||
[39116, "\uFE6C"], | ||
[39265, "\uFF5F"], | ||
[39394, "\uFFE6"], | ||
[189000, "\u{10000}"] | ||
]; |
33 changes: 33 additions & 0 deletions
33
test/fixtures/wpt/encoding/legacy-mb-schinese/gbk/gbk-decoder.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<!doctype html> | ||
<script src=/resources/testharness.js></script> | ||
<script src=/resources/testharnessreport.js></script> | ||
<script> | ||
const gbkPointers = [ | ||
6432, 7533, 7536, 7672, 7673, 7674, 7675, 7676, 7677, 7678, 7679, 7680, 7681, 7682, 7683, 7684, | ||
23766, 23770, 23771, 23772, 23773, 23774, 23776, 23777, 23778, 23779, 23780, 23781, 23782, 23784, 23785, 23786, | ||
23787, 23790, 23791, 23792, 23793, 23796, 23797, 23798, 23799, 23800, 23801, 23802, 23803, 23805, 23806, 23807, | ||
23808, 23809, 23810, 23811, 23813, 23814, 23815, 23816, 23817, 23818, 23819, 23820, 23821, 23822, 23823, 23824, | ||
23825, 23826, 23827, 23828, 23831, 23832, 23833, 23834, 23835, 23836, 23837, 23838, 23839, 23840, 23841, 23842, | ||
23843, 23844 | ||
]; | ||
const codePoints = [ | ||
0x20ac, 0x1e3f, 0x01f9, 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6, 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb, | ||
0x2e81, 0x2e84, 0x3473, 0x3447, 0x2e88, 0x2e8b, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e, 0x3918, 0x39cf, 0x39df, 0x3a73, | ||
0x39d0, 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0x2eaa, 0x4056, 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0x43b1, 0x43ac, 0x2ebb, | ||
0x43dd, 0x44d6, 0x4661, 0x464c, 0x4723, 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982, 0x4983, 0x4985, 0x4986, | ||
0x499f, 0x499b, 0x49b7, 0x49b6, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13, 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, | ||
0x4d19, 0x4dae | ||
]; | ||
|
||
for (let i = 0; i < gbkPointers.length; i++) { | ||
const pointer = gbkPointers[i]; | ||
test(function() { | ||
const lead = pointer / 190 + 0x81; | ||
const trail = pointer % 190; | ||
const offset = trail < 0x3F ? 0x40 : 0x41; | ||
const encoded = [lead, trail + offset]; | ||
const decoded = new TextDecoder("GBK").decode(new Uint8Array(encoded)).charCodeAt(0); | ||
assert_equals(decoded, codePoints[i]); | ||
}, "gbk pointer: " + pointer) | ||
} | ||
</script> |
26 changes: 26 additions & 0 deletions
26
test/fixtures/wpt/encoding/legacy-mb-schinese/gbk/gbk-encoder.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
<!doctype html> | ||
<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite --> | ||
<script src=/resources/testharness.js></script> | ||
<script src=/resources/testharnessreport.js></script> | ||
<script> | ||
function encode(input, output, desc) { | ||
test(function() { | ||
const a = document.createElement("a") // <a> uses document encoding for URL's query | ||
a.href = "https://example.com/?" + input; | ||
assert_equals(a.search.substr(1), output); // remove leading "?" | ||
}, "gbk encoder: " + desc); | ||
} | ||
|
||
encode("s", "s", "very basic"); | ||
encode("\u20AC", "%80", "Euro"); | ||
encode("\u4E02", "%81@", "character"); | ||
encode("\uE4C6", "%A1@", "PUA"); | ||
encode("\uE4C5", "%FE%FE", "PUA #2"); | ||
encode("\ud83d\udca9", "%26%23128169%3B", "poo"); | ||
encode("\uE7C8", "%26%2359336%3B", "legacy ICU special case 1"); | ||
encode("\u2026", "%A1%AD", "legacy ICU special case 2"); | ||
encode("\uFF5E", "%A1%AB", "legacy ICU special case 3"); | ||
encode("\u00A5", "%26%23165%3B", "legacy WebKit case 1"); | ||
encode("\u22EF", "%26%238943%3B", "legacy WebKit case 2"); | ||
encode("\u301C", "%26%2312316%3B", "legacy WebKit case 3"); | ||
</script> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters