From e1b023b9633d56a33821603f5433459add50d75b Mon Sep 17 00:00:00 2001
From: Jarek Radosz <jradosz@gmail.com>
Date: Fri, 28 Jun 2019 17:52:54 +0200
Subject: [PATCH 1/4] =?UTF-8?q?Don=E2=80=99t=20strip=20a=20newline=20after?=
 =?UTF-8?q?=20closing=20pre/textarea=20tags?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/evented-tokenizer.ts          | 25 +++++++++++++++++++++++--
 src/generated/tokenizer-states.ts |  1 +
 tests/tokenizer-tests.ts          |  5 +++++
 3 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts
index 51d6512..99620c0 100644
--- a/src/evented-tokenizer.ts
+++ b/src/evented-tokenizer.ts
@@ -22,6 +22,7 @@ export default class EventedTokenizer {
   reset() {
     this.transitionTo(TokenizerState.beforeData);
     this.input = '';
+    this.tagNameBuffer = '';
 
     this.index = 0;
     this.line = 1;
@@ -124,10 +125,12 @@ export default class EventedTokenizer {
       } else {
         if (char === '\n') {
           let tag = this.tagNameBuffer.toLowerCase();
+
           if (tag === 'pre' || tag === 'textarea') {
             this.consume();
           }
         }
+
         this.transitionTo(TokenizerState.data);
         this.delegate.beginData();
       }
@@ -168,7 +171,7 @@ export default class EventedTokenizer {
     markupDeclarationOpen() {
       let char = this.consume();
 
-      if (char === '-' && this.input.charAt(this.index) === '-') {
+      if (char === '-' && this.peek() === '-') {
         this.consume();
         this.transitionTo(TokenizerState.commentStart);
         this.delegate.beginComment();
@@ -251,6 +254,24 @@ export default class EventedTokenizer {
       }
     },
 
+    endTagName() {
+      let char = this.consume();
+
+      if (isSpace(char)) {
+        this.transitionTo(TokenizerState.beforeAttributeName);
+        this.tagNameBuffer = '';
+      } else if (char === '/') {
+        this.transitionTo(TokenizerState.selfClosingStartTag);
+        this.tagNameBuffer = '';
+      } else if (char === '>') {
+        this.delegate.finishTag();
+        this.transitionTo(TokenizerState.beforeData);
+        this.tagNameBuffer = '';
+      } else {
+        this.appendToTagName(char);
+      }
+    },
+
     beforeAttributeName() {
       let char = this.peek();
 
@@ -453,7 +474,7 @@ export default class EventedTokenizer {
       let char = this.consume();
 
       if (char === '@' || char === ':' || isAlpha(char)) {
-        this.transitionTo(TokenizerState.tagName);
+        this.transitionTo(TokenizerState.endTagName);
         this.tagNameBuffer = '';
         this.delegate.beginEndTag();
         this.appendToTagName(char);
diff --git a/src/generated/tokenizer-states.ts b/src/generated/tokenizer-states.ts
index 747c277..1c52cb1 100644
--- a/src/generated/tokenizer-states.ts
+++ b/src/generated/tokenizer-states.ts
@@ -12,6 +12,7 @@ export const enum TokenizerState {
   tagOpen = 'tagOpen',
   endTagOpen = 'endTagOpen',
   tagName = 'tagName',
+  endTagName = 'endTagName',
   rcdataLessThanSign = 'rcdataLessThanSign',
   rcdataEndTagOpen = 'rcdataEndTagOpen',
   rcdataEndTagName = 'rcdataEndTagName',
diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts
index 17fc894..1ea0141 100644
--- a/tests/tokenizer-tests.ts
+++ b/tests/tokenizer-tests.ts
@@ -205,6 +205,11 @@ QUnit.test('A newline immediately following a <pre> tag is stripped', function(a
   assert.deepEqual(tokens, [startTag('pre'), chars('hello'), endTag('pre')]);
 });
 
+QUnit.test('A newline immediately following a closing </pre> tag is not stripped', function(assert) {
+  let tokens = tokenize("\n<pre>\nhello</pre>\n");
+  assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('hello'), endTag('pre'), chars('\n')]);
+});
+
 // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
 QUnit.test('A newline immediately following a <PRE> tag is stripped', function(assert) {
   let tokens = tokenize("<PRE>\nhello</PRE>");

From 5950786ece55f8b7887dbec4683e8c845493def7 Mon Sep 17 00:00:00 2001
From: Jarek Radosz <jradosz@gmail.com>
Date: Wed, 26 Jun 2019 17:28:45 +0200
Subject: [PATCH 2/4] =?UTF-8?q?Make=20sure=20title,=20style,=20and=20scrip?=
 =?UTF-8?q?t=20elements=E2=80=99=20contents=20are=20text?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/evented-tokenizer.ts | 12 ++++++++++--
 tests/tokenizer-tests.ts | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts
index 99620c0..a44ed77 100644
--- a/src/evented-tokenizer.ts
+++ b/src/evented-tokenizer.ts
@@ -114,11 +114,19 @@ export default class EventedTokenizer {
     this.delegate.appendToTagName(char);
   }
 
+  private isIgnoredEndTag(): boolean {
+    let tag = this.tagNameBuffer.toLowerCase();
+
+    return (tag === 'title' && this.input.substr(this.index, 8) !== '</title>') ||
+      (tag === 'style' && this.input.substr(this.index, 8) !== '</style>') ||
+      (tag === 'script' && this.input.substr(this.index, 9) !== '</script>');
+  }
+
   states: { [k in TokenizerState]?: (this: EventedTokenizer) => void } = {
     beforeData() {
       let char = this.peek();
 
-      if (char === '<') {
+      if (char === '<' && !this.isIgnoredEndTag()) {
         this.transitionTo(TokenizerState.tagOpen);
         this.markTagStart();
         this.consume();
@@ -139,7 +147,7 @@ export default class EventedTokenizer {
     data() {
       let char = this.peek();
 
-      if (char === '<') {
+      if (char === '<' && !this.isIgnoredEndTag()) {
         this.delegate.finishData();
         this.transitionTo(TokenizerState.tagOpen);
         this.markTagStart();
diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts
index 1ea0141..9c3983a 100644
--- a/tests/tokenizer-tests.ts
+++ b/tests/tokenizer-tests.ts
@@ -222,6 +222,38 @@ QUnit.test('A newline immediately following a <textarea> tag is stripped', funct
   assert.deepEqual(tokens, [startTag('textarea'), chars('hello'), endTag('textarea')]);
 });
 
+// https://html.spec.whatwg.org/multipage/semantics.html#the-title-element
+QUnit.test('The title element content is always text', function(assert) {
+  let tokens = tokenize("<title>&quot;hey <b>there</b><!-- comment --></title>");
+  assert.deepEqual(tokens, [startTag('title'), chars('"hey <b>there</b><!-- comment -->'), endTag('title')]);
+});
+
+// https://html.spec.whatwg.org/multipage/semantics.html#the-style-element
+QUnit.test('The style element content is always text', function(assert) {
+  let tokens = tokenize("<style>&quot;hey <b>there</b><!-- comment --></style>");
+  assert.deepEqual(tokens, [startTag('style'), chars('"hey <b>there</b><!-- comment -->'), endTag('style')]);
+});
+
+// https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
+QUnit.test('The script element content restrictions', function(assert) {
+  let tokens = tokenize("<script>&quot;hey <b>there</b><!-- comment --></script>");
+  assert.deepEqual(tokens, [startTag('script'), chars('"hey <b>there</b><!-- comment -->'), endTag('script')]);
+});
+
+QUnit.test('Two following script tags', function(assert) {
+  let tokens = tokenize("<script><!-- comment --></script> <script>second</script>");
+
+  assert.deepEqual(tokens, [
+    startTag('script'),
+    chars('<!-- comment -->'),
+    endTag('script'),
+    chars(' '),
+    startTag('script'),
+    chars('second'),
+    endTag('script')
+  ]);
+});
+
 // https://github.com/emberjs/rfcs/blob/master/text/0311-angle-bracket-invocation.md#dynamic-invocations
 QUnit.test('An Emberish named arg invocation', function(assert) {
   let tokens = tokenize('<@foo></@foo>');

From ae8ef147f173c812c55b9c8c2da75996ccd69210 Mon Sep 17 00:00:00 2001
From: Jarek Radosz <jradosz@gmail.com>
Date: Thu, 11 Jul 2019 19:43:16 +0200
Subject: [PATCH 3/4] Replace `substr` with `substring`

---
 src/evented-tokenizer.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts
index a44ed77..036bc52 100644
--- a/src/evented-tokenizer.ts
+++ b/src/evented-tokenizer.ts
@@ -117,9 +117,9 @@ export default class EventedTokenizer {
   private isIgnoredEndTag(): boolean {
     let tag = this.tagNameBuffer.toLowerCase();
 
-    return (tag === 'title' && this.input.substr(this.index, 8) !== '</title>') ||
-      (tag === 'style' && this.input.substr(this.index, 8) !== '</style>') ||
-      (tag === 'script' && this.input.substr(this.index, 9) !== '</script>');
+    return (tag === 'title' && this.input.substring(this.index, this.index + 8) !== '</title>') ||
+      (tag === 'style' && this.input.substring(this.index, this.index + 8) !== '</style>') ||
+      (tag === 'script' && this.input.substring(this.index, this.index + 9) !== '</script>');
   }
 
   states: { [k in TokenizerState]?: (this: EventedTokenizer) => void } = {

From 627659c69b87c8a53bcb7223917fdc0aed6bfce7 Mon Sep 17 00:00:00 2001
From: Jarek Radosz <jradosz@gmail.com>
Date: Thu, 11 Jul 2019 20:54:54 +0200
Subject: [PATCH 4/4] =?UTF-8?q?Don=E2=80=99t=20transform=20character=20ent?=
 =?UTF-8?q?ity=20references?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/evented-tokenizer.ts | 3 ++-
 tests/tokenizer-tests.ts | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts
index 036bc52..80d0b4b 100644
--- a/src/evented-tokenizer.ts
+++ b/src/evented-tokenizer.ts
@@ -146,13 +146,14 @@ export default class EventedTokenizer {
 
     data() {
       let char = this.peek();
+      let tag = this.tagNameBuffer.toLowerCase();
 
       if (char === '<' && !this.isIgnoredEndTag()) {
         this.delegate.finishData();
         this.transitionTo(TokenizerState.tagOpen);
         this.markTagStart();
         this.consume();
-      } else if (char === '&') {
+      } else if (char === '&' && tag !== 'script' && tag !== 'style') {
         this.consume();
         this.delegate.appendToData(this.consumeCharRef() || '&');
       } else {
diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts
index 9c3983a..98538f1 100644
--- a/tests/tokenizer-tests.ts
+++ b/tests/tokenizer-tests.ts
@@ -231,13 +231,13 @@ QUnit.test('The title element content is always text', function(assert) {
 // https://html.spec.whatwg.org/multipage/semantics.html#the-style-element
 QUnit.test('The style element content is always text', function(assert) {
   let tokens = tokenize("<style>&quot;hey <b>there</b><!-- comment --></style>");
-  assert.deepEqual(tokens, [startTag('style'), chars('"hey <b>there</b><!-- comment -->'), endTag('style')]);
+  assert.deepEqual(tokens, [startTag('style'), chars('&quot;hey <b>there</b><!-- comment -->'), endTag('style')]);
 });
 
 // https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
 QUnit.test('The script element content restrictions', function(assert) {
   let tokens = tokenize("<script>&quot;hey <b>there</b><!-- comment --></script>");
-  assert.deepEqual(tokens, [startTag('script'), chars('"hey <b>there</b><!-- comment -->'), endTag('script')]);
+  assert.deepEqual(tokens, [startTag('script'), chars('&quot;hey <b>there</b><!-- comment -->'), endTag('script')]);
 });
 
 QUnit.test('Two following script tags', function(assert) {