Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix DomException when simplifying nested elements caused by invalid attribute names #918

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -538,10 +538,19 @@ Readability.prototype = {
) {
var child = node.children[0];
for (var i = 0; i < node.attributes.length; i++) {
child.setAttribute(
node.attributes[i].name,
node.attributes[i].value
);
try {
child.setAttribute(
node.attributes[i].name,
node.attributes[i].value
);
} catch (ex) {
/* it's possible for setAttribute() to throw if the attribute name
* isn't a valid XML Name. Such attributes can however be parsed from
* source in HTML docs, see https://github.com/whatwg/html/issues/4275,
* so we can hit them here and then throw. We don't care about such
* attributes so we ignore them.
*/
}
}
node.parentNode.replaceChild(child, node);
node = child;
Expand Down
54 changes: 46 additions & 8 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"mocha": "10.1.0",
"prettier": "^3.3.2",
"release-it": "17.0.1",
"sinon": "14.0.2"
"sinon": "14.0.2",
"xml-name-validator": "^5.0.0"
}
}
9 changes: 9 additions & 0 deletions test/test-pages/invalid-attributes/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"title": "Lorem Ipsum",
"byline": null,
"dir": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
"siteName": null,
"publishedTime": null,
"readerable": false
}
7 changes: 7 additions & 0 deletions test/test-pages/invalid-attributes/expected.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<div id="readability-page-1" class="page">
<div "="">
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
</p>
</div>
</div>
19 changes: 19 additions & 0 deletions test/test-pages/invalid-attributes/source.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<head>
<title>Lorem Ipsum</title>
</head>
<body>
<main>
<section>
<div "="">
<div>
<div>
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
</div>
</div>
</div>
</section>
</main>
</body>
</html>
26 changes: 15 additions & 11 deletions test/test-readability.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* eslint-env node, mocha */

var JSDOM = require("jsdom").JSDOM;
var xmlNameValidator = require("xml-name-validator").name;
var chai = require("chai");
var sinon = require("sinon");
chai.config.includeStack = true;
Expand Down Expand Up @@ -121,14 +122,17 @@ function runTestsWithItems(

function attributesForNode(node) {
return Array.from(node.attributes)
.filter(function (attr) {
return xmlNameValidator(attr.name);
})
.map(function (attr) {
return attr.name + "=" + attr.value;
})
.join(",");
});
}

var actualDOM = domGenerationFn(prettyPrint(result.content));
var expectedDOM = domGenerationFn(prettyPrint(expectedContent));

traverseDOM(
function (actualNode, expectedNode) {
if (actualNode && expectedNode) {
Expand All @@ -152,21 +156,21 @@ function runTestsWithItems(
}
// Compare attributes for element nodes:
} else if (actualNode.nodeType == 1) {
var actualNodeDesc = attributesForNode(actualNode);
var expectedNodeDesc = attributesForNode(expectedNode);
var actualNodeAttributes = attributesForNode(actualNode);
var expectedNodeAttributes = attributesForNode(expectedNode);
var desc =
"node " +
nodeStr(actualNode) +
" attributes (" +
actualNodeDesc +
actualNodeAttributes.join(",") +
") should match (" +
expectedNodeDesc +
") ";
expect(actualNode.attributes.length, desc).eql(
expectedNode.attributes.length
expectedNodeAttributes.join(",") +
") 1";
expect(actualNodeAttributes.length, desc).eql(
expectedNodeAttributes.length
);
for (var i = 0; i < actualNode.attributes.length; i++) {
var attr = actualNode.attributes[i].name;
for (var i = 0; i < actualNodeAttributes.length; i++) {
var attr = actualNodeAttributes[i].name;
var actualValue = actualNode.getAttribute(attr);
var expectedValue = expectedNode.getAttribute(attr);
expect(
Expand Down