Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit onNodeInserted + onNodeClosed for the root document #2182

Merged
merged 2 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
## 1.18.2 (Pending)

### Improvements
* The form associated elements returned by `FormElement.elements()` now reflect changes made to the DOM,

* The form associated elements returned by `FormElement.elements()` now reflect changes made to the DOM,
subsequently to the original parse. [2140](https://github.com/jhy/jsoup/issues/2140)
* In the `TreeBuilder`, the `onNodeInserted()` and `onNodeClosed()` events are now also fired for the outermost /
root `Document` node. This enables source position tracking on the Document node (which was previously unset). And
it also enables the node traversor to see the outer Document node. [2182](https://github.com/jhy/jsoup/pull/2182)

### Bug Fixes

Expand Down
9 changes: 8 additions & 1 deletion src/main/java/org/jsoup/parser/TreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ void initialiseParse(Reader input, String baseUri, Parser parser) {
start = new Token.StartTag(this);
currentToken = start; // init current token to the virtual start token.
this.baseUri = baseUri;
onNodeInserted(doc);
}

void completeParse() {
Expand Down Expand Up @@ -108,7 +109,13 @@ void runParser() {
boolean stepParser() {
// if we have reached the end already, step by popping off the stack, to hit nodeRemoved callbacks:
if (currentToken.type == Token.TokenType.EOF) {
if (stack == null || stack.isEmpty()) return false; // stack will be null if TB was closed, as in case of runParser() + completeFragment()
if (stack == null) {
return false;
} if (stack.isEmpty()) {
onNodeClosed(doc); // the root doc is not on the stack, so let this final step close it
stack = null;
return true;
}
pop();
return true;
}
Expand Down
16 changes: 16 additions & 0 deletions src/test/java/org/jsoup/parser/PositionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,22 @@ private void printRange(Node node) {
assertEquals("class=\"On\"", attr.html());
}

@Test void tracksDocument() {
String html = "<!doctype html><title>Foo</title><p>Bar.";
Document doc = Jsoup.parse(html, TrackingHtmlParser);
StringBuilder track = new StringBuilder();
doc.forEachNode(node -> accumulatePositions(node, track));
assertEquals("#document:0-0~40-40; #doctype:0-15; html:15-15~40-40; head:15-15~33-33; title:15-22~15-33; #text:22-25; body:33-33~40-40; p:33-36~40-40; #text:36-40; ", track.toString());
}

@Test void tracksDocumentXml() {
String html = "<!doctype html><title>Foo</title><p>Bar.";
Document doc = Jsoup.parse(html, TrackingXmlParser);
StringBuilder track = new StringBuilder();
doc.forEachNode(node -> accumulatePositions(node, track));
assertEquals("#document:0-0~40-40; #doctype:0-15; title:15-22~25-33; #text:22-25; p:33-36~40-40; #text:36-40; ", track.toString());
}

@Test void updateKeyMaintainsRangeUc() {
String html = "<p xsi:CLASS=On>One</p>";
Document doc = Jsoup.parse(html, TrackingXmlParser);
Expand Down
20 changes: 10 additions & 10 deletions src/test/java/org/jsoup/parser/StreamParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void canStream() {
StringBuilder seen;
seen = new StringBuilder();
parser.stream().forEachOrdered(el -> trackSeen(el, seen));
assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;", seen.toString());
assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;#root;", seen.toString());
// checks expected order, and the + indicates that element had a next sibling at time of emission
}
}
Expand All @@ -48,7 +48,7 @@ void canStreamXml() {
StringBuilder seen;
seen = new StringBuilder();
parser.stream().forEachOrdered(el -> trackSeen(el, seen));
assertEquals("DIV#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];outmost;", seen.toString());
assertEquals("DIV#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];outmost;#root;", seen.toString());
// checks expected order, and the + indicates that element had a next sibling at time of emission
}
}
Expand All @@ -64,7 +64,7 @@ void canStreamXml() {
trackSeen(it.next(), seen);
}

assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;", seen.toString());
assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;#root;", seen.toString());
// checks expected order, and the + indicates that element had a next sibling at time of emission
}

Expand All @@ -75,13 +75,13 @@ void canStreamXml() {

StringBuilder seen = new StringBuilder();
parser.stream().forEach(el -> trackSeen(el, seen));
assertEquals("head+;p[One]+;p[Two];body;html;", seen.toString());
assertEquals("head+;p[One]+;p[Two];body;html;#root;", seen.toString());

String html2 = "<div>Three<div>Four</div></div>";
StringBuilder seen2 = new StringBuilder();
parser.parse(html2, "");
parser.stream().forEach(el -> trackSeen(el, seen2));
assertEquals("head+;div[Four];div[Three];body;html;", seen2.toString());
assertEquals("head+;div[Four];div[Three];body;html;#root;", seen2.toString());

// re-run without a new parse should be empty
StringBuilder seen3 = new StringBuilder();
Expand Down Expand Up @@ -247,7 +247,7 @@ static void trackSeen(Element el, StringBuilder actual) {
StreamParser streamer = basic();
assertFalse(isClosed(streamer));
long count = streamer.stream().count();
assertEquals(6, count);
assertEquals(7, count);

assertTrue(isClosed(streamer));
}
Expand All @@ -261,7 +261,7 @@ static void trackSeen(Element el, StringBuilder actual) {
it.next();
count++;
}
assertEquals(6, count);
assertEquals(7, count);
assertTrue(isClosed(streamer));
}

Expand Down Expand Up @@ -384,7 +384,7 @@ void canStreamFragment() {
try (StreamParser parser = new StreamParser(Parser.htmlParser()).parseFragment(html, context, "")) {
StringBuilder seen = new StringBuilder();
parser.stream().forEachOrdered(el -> trackSeen(el, seen));
assertEquals("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;tbody;table;", seen.toString());
assertEquals("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;tbody;table;#root;", seen.toString());
// checks expected order, and the + indicates that element had a next sibling at time of emission
// note that we don't get a full doc, just the fragment (and the context at the end of the stack)

Expand All @@ -405,7 +405,7 @@ void canStreamFragment() {
trackSeen(it.next(), seen);
}

assertEquals("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;tbody;table;", seen.toString());
assertEquals("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;tbody;table;#root;", seen.toString());
// checks expected order, and the + indicates that element had a next sibling at time of emission
// note that we don't get a full doc, just the fragment (and the context at the end of the stack)

Expand Down Expand Up @@ -451,7 +451,7 @@ void canStreamFragmentXml() throws IOException {
try (StreamParser parser = new StreamParser(Parser.xmlParser()).parseFragment(html, context, "")) {
StringBuilder seen = new StringBuilder();
parser.stream().forEachOrdered(el -> trackSeen(el, seen));
assertEquals("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;", seen.toString());
assertEquals("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;#root;", seen.toString());
// checks expected order, and the + indicates that element had a next sibling at time of emission
// note that we don't get a full doc, just the fragment

Expand Down
4 changes: 3 additions & 1 deletion src/test/java/org/jsoup/safety/CleanerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,8 @@ public void bailsIfRemovingProtocolThatsNotSet() {
Element p = orig.expectFirst("p");
Range origRange = p.sourceRange();
assertEquals("2,2:22-2,10:30", origRange.toString());
assertEquals("1,1:0-1,1:0", orig.sourceRange().toString());
assertEquals("2,19:39-2,19:39", orig.endSourceRange().toString());

Range.AttributeRange attributeRange = p.attributes().sourceRange("id");
assertEquals("2,5:25-2,7:27=2,8:28-2,9:29", attributeRange.toString());
Expand All @@ -404,7 +406,7 @@ public void bailsIfRemovingProtocolThatsNotSet() {
assertEquals("1", cleanP.id());
Range cleanRange = cleanP.sourceRange();
assertEquals(origRange, cleanRange);
assertEquals(orig.endSourceRange(), clean.endSourceRange());
assertEquals(p.endSourceRange(), cleanP.endSourceRange());
assertEquals(attributeRange, cleanP.attributes().sourceRange("id"));
}

Expand Down
Loading