+
Release 1.15.4 [18-Feb-2023]
* Improvement: added the ability to escape CSS selectors (tags, IDs, classes) to match elements that don't follow
regular CSS syntax. For example, to match by classname , use document.select("p.one\\.two");
diff --git a/src/main/java/org/jsoup/parser/CharacterReader.java b/src/main/java/org/jsoup/parser/CharacterReader.java
index df902b1684..1d00ec6ffb 100644
--- a/src/main/java/org/jsoup/parser/CharacterReader.java
+++ b/src/main/java/org/jsoup/parser/CharacterReader.java
@@ -116,6 +116,11 @@ public int pos() {
return readerPos + bufPos;
}
+ /** Tests if the buffer has been fully read. */
+ boolean readFully() {
+ return readFully;
+ }
+
/**
Enables or disables line number tracking. By default, will be off.Tracking line numbers improves the
legibility of parser error messages, for example. Tracking should be enabled before any content is read to be of
diff --git a/src/main/java/org/jsoup/parser/TokeniserState.java b/src/main/java/org/jsoup/parser/TokeniserState.java
index 874fed0c05..f269fc626c 100644
--- a/src/main/java/org/jsoup/parser/TokeniserState.java
+++ b/src/main/java/org/jsoup/parser/TokeniserState.java
@@ -186,7 +186,7 @@ void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.createTempBuffer();
t.advanceTransition(RCDATAEndTagOpen);
- } else if (r.matchesAsciiAlpha() && t.appropriateEndTagName() != null && !r.containsIgnoreCase(t.appropriateEndTagSeq())) {
+ } else if (r.readFully() && r.matchesAsciiAlpha() && t.appropriateEndTagName() != null && !r.containsIgnoreCase(t.appropriateEndTagSeq())) {
// diverge from spec: got a start tag, but there's no appropriate end tag (), so rather than
// consuming to EOF; break out here
t.tagPending = t.createTagPending(false).name(t.appropriateEndTagName());
diff --git a/src/test/java/org/jsoup/parser/HtmlParserTest.java b/src/test/java/org/jsoup/parser/HtmlParserTest.java
index 43475ce847..55f506562f 100644
--- a/src/test/java/org/jsoup/parser/HtmlParserTest.java
+++ b/src/test/java/org/jsoup/parser/HtmlParserTest.java
@@ -1732,4 +1732,20 @@ private boolean didAddElements(String input) {
//assertEquals("OneTwo", doc.expectFirst("body > div").text());
System.out.println(doc.html());
}
+
+ @Test void largeTextareaContents() {
+ // https://github.com/jhy/jsoup/issues/1929
+ StringBuilder sb = new StringBuilder();
+ int num = 2000;
+ for (int i = 0; i <= num; i++) {
+ sb.append("\nfoo\n");
+ }
+ String textContent = sb.toString();
+ String sourceHtml = "";
+
+ Document doc = Jsoup.parse(sourceHtml);
+ Element textArea = doc.expectFirst("textarea");
+
+ assertEquals(textContent, textArea.wholeText());
+ }
}