Skip to content

Commit

Permalink
Encoding/HTML: cross-document encoding inheritance
Browse files Browse the repository at this point in the history
Make Encoding tests no longer rely on an interoperability issue and simultaneously add tests for that issue. The latter tests align with the HTML Standard, but that might not necessarily be the path we want to take.

whatwg/html#8789 can be used for further discussion.
  • Loading branch information
annevk authored Apr 4, 2023
1 parent cbd294b commit a58bbf6
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 4 deletions.
3 changes: 3 additions & 0 deletions encoding/resources/text-html-meta-charset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def main(request, response):
response.headers.set(b"Content-Type", b"text/html")
response.content = b"<meta charset=\"" + request.GET.first(b"label") + b"\">hello encoding"
10 changes: 6 additions & 4 deletions encoding/unsupported-labels.window.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,13 @@
t.add_cleanup(() => {
frame.remove();
});
frame.src = "resources/text-plain-charset.py?label=" + label;
// Intentionally use <meta> as Content-Type results in browser differences
// See /html/syntax/charset/inheritance-bogus-meta.html
frame.src = "resources/text-html-meta-charset.py?label=" + label;
frame.onload = t.step_func_done(() => {
// If we ever change this default this needs adjusting accordingly.
assert_equals(frame.contentDocument.characterSet, "windows-1252");
assert_equals(frame.contentDocument.inputEncoding, "windows-1252");
// UTF-8 as it inherits from the parent document when unrecognized
assert_equals(frame.contentDocument.characterSet, "UTF-8");
assert_equals(frame.contentDocument.inputEncoding, "UTF-8");
});
document.body.append(frame);
}, `${label} is not supported by the Encoding Standard`);
Expand Down
35 changes: 35 additions & 0 deletions html/syntax/charset/inheritance-bogus-meta-utf-8.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<!doctype html>
<meta charset="utf-8" />
<title>Inheriting from UTF-8 parent</title>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
[
{
"title": "Child with bogus <meta charset>",
"url": "resources/bogus-charset.html",
"expected": "\uFFFD\n" // 0x00A2 in windows-1252 is 0xFFFD in UTF-8
},
{
"title": "Child with bogus Content-Type charset",
"url": "resources/bogus-charset-http.py",
"expected": "\uFFFD\n"
},
{
"title": "Child with bogus Content-Type charset, but valid <meta charset>",
"url": "resources/bogus-charset-http-valid-meta.py",
"expected": "\u045E\n"
}
].forEach(({ title, url, expected }) => {
async_test(t => {
const frame = document.createElement("iframe");
t.add_cleanup(() => frame.remove());
frame.src = url;
frame.onload = t.step_func_done(() => {
assert_equals(frame.contentDocument.body.textContent, expected);
});
document.body.append(frame);
}, title);
});
</script>
46 changes: 46 additions & 0 deletions html/syntax/charset/inheritance-bogus-meta.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<!doctype html>
<meta charset=windows-1253>
<title>Inheriting from windows-1253 parent</title>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script src=/common/get-host-info.sub.js></script>
<div id=log></div>
<script>
[
{
"title": "Child with bogus <meta charset>",
"url": "resources/bogus-charset.html",
"expected": "¢\n" // 0x00A2 in windows-1252 is at the same position as 0x0386 in windows-1253
},
{
"title": "Child with bogus Content-Type charset",
"url": "resources/bogus-charset-http.py",
"expected": "¢\n"
},
{
"title": "Child with bogus Content-Type charset, but valid <meta charset>",
"url": "resources/bogus-charset-http-valid-meta.py",
"expected": "\u045E\n"
}
].forEach(({ title, url, expected }) => {
async_test(t => {
const frame = document.createElement("iframe");
t.add_cleanup(() => frame.remove());
frame.src = url;
frame.onload = t.step_func_done(() => {
assert_equals(frame.contentDocument.body.textContent, expected);
});
document.body.append(frame);
}, title);
});

async_test(t => {
self.onmessage = t.step_func_done(({ data }) => {
assert_equals(data, "\u00A2\n");
});
const frame = document.createElement("iframe");
t.add_cleanup(() => frame.remove());
frame.src = get_host_info().HTTP_REMOTE_ORIGIN + new URL("resources/bogus-charset.html", location).pathname + "?postMessage";
document.body.append(frame);
}, "Cross-origin child with bogus <meta charset>");
</script>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def main(request, response):
response.headers.set(b"Content-Type", b"text/html;charset=this-is-not-a-charset")
# ¢
response.content = b"<meta charset=windows-1251>\xA2\n"
4 changes: 4 additions & 0 deletions html/syntax/charset/resources/bogus-charset-http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def main(request, response):
response.headers.set(b"Content-Type", b"text/html;charset=this-is-not-a-charset")
# ¢
response.content = b"\xA2\n"
7 changes: 7 additions & 0 deletions html/syntax/charset/resources/bogus-charset.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!doctype html><meta charset=this-is-not-a-charset><script>
onload = () => {
if (location.search === "?postMessage") {
parent.postMessage(document.body.textContent, "*");
}
}
</script>¢

0 comments on commit a58bbf6

Please sign in to comment.