From e1b023b9633d56a33821603f5433459add50d75b Mon Sep 17 00:00:00 2001 From: Jarek Radosz Date: Fri, 28 Jun 2019 17:52:54 +0200 Subject: [PATCH 1/4] =?UTF-8?q?Don=E2=80=99t=20strip=20a=20newline=20after?= =?UTF-8?q?=20closing=20pre/textarea=20tags?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/evented-tokenizer.ts | 25 +++++++++++++++++++++++-- src/generated/tokenizer-states.ts | 1 + tests/tokenizer-tests.ts | 5 +++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts index 51d6512..99620c0 100644 --- a/src/evented-tokenizer.ts +++ b/src/evented-tokenizer.ts @@ -22,6 +22,7 @@ export default class EventedTokenizer { reset() { this.transitionTo(TokenizerState.beforeData); this.input = ''; + this.tagNameBuffer = ''; this.index = 0; this.line = 1; @@ -124,10 +125,12 @@ export default class EventedTokenizer { } else { if (char === '\n') { let tag = this.tagNameBuffer.toLowerCase(); + if (tag === 'pre' || tag === 'textarea') { this.consume(); } } + this.transitionTo(TokenizerState.data); this.delegate.beginData(); } @@ -168,7 +171,7 @@ export default class EventedTokenizer { markupDeclarationOpen() { let char = this.consume(); - if (char === '-' && this.input.charAt(this.index) === '-') { + if (char === '-' && this.peek() === '-') { this.consume(); this.transitionTo(TokenizerState.commentStart); this.delegate.beginComment(); @@ -251,6 +254,24 @@ export default class EventedTokenizer { } }, + endTagName() { + let char = this.consume(); + + if (isSpace(char)) { + this.transitionTo(TokenizerState.beforeAttributeName); + this.tagNameBuffer = ''; + } else if (char === '/') { + this.transitionTo(TokenizerState.selfClosingStartTag); + this.tagNameBuffer = ''; + } else if (char === '>') { + this.delegate.finishTag(); + this.transitionTo(TokenizerState.beforeData); + this.tagNameBuffer = ''; + } else { + this.appendToTagName(char); + } + }, + beforeAttributeName() { let char = this.peek(); @@ -453,7 +474,7 @@ export default class EventedTokenizer { let char = this.consume(); if (char === '@' || char === ':' || isAlpha(char)) { - this.transitionTo(TokenizerState.tagName); + this.transitionTo(TokenizerState.endTagName); this.tagNameBuffer = ''; this.delegate.beginEndTag(); this.appendToTagName(char); diff --git a/src/generated/tokenizer-states.ts b/src/generated/tokenizer-states.ts index 747c277..1c52cb1 100644 --- a/src/generated/tokenizer-states.ts +++ b/src/generated/tokenizer-states.ts @@ -12,6 +12,7 @@ export const enum TokenizerState { tagOpen = 'tagOpen', endTagOpen = 'endTagOpen', tagName = 'tagName', + endTagName = 'endTagName', rcdataLessThanSign = 'rcdataLessThanSign', rcdataEndTagOpen = 'rcdataEndTagOpen', rcdataEndTagName = 'rcdataEndTagName', diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts index 17fc894..1ea0141 100644 --- a/tests/tokenizer-tests.ts +++ b/tests/tokenizer-tests.ts @@ -205,6 +205,11 @@ QUnit.test('A newline immediately following a
 tag is stripped', function(a
   assert.deepEqual(tokens, [startTag('pre'), chars('hello'), endTag('pre')]);
 });
 
+QUnit.test('A newline immediately following a closing 
tag is not stripped', function(assert) { + let tokens = tokenize("\n
\nhello
\n"); + assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('hello'), endTag('pre'), chars('\n')]); +}); + // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('A newline immediately following a
 tag is stripped', function(assert) {
   let tokens = tokenize("
\nhello
"); From 5950786ece55f8b7887dbec4683e8c845493def7 Mon Sep 17 00:00:00 2001 From: Jarek Radosz Date: Wed, 26 Jun 2019 17:28:45 +0200 Subject: [PATCH 2/4] =?UTF-8?q?Make=20sure=20title,=20style,=20and=20scrip?= =?UTF-8?q?t=20elements=E2=80=99=20contents=20are=20text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/evented-tokenizer.ts | 12 ++++++++++-- tests/tokenizer-tests.ts | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts index 99620c0..a44ed77 100644 --- a/src/evented-tokenizer.ts +++ b/src/evented-tokenizer.ts @@ -114,11 +114,19 @@ export default class EventedTokenizer { this.delegate.appendToTagName(char); } + private isIgnoredEndTag(): boolean { + let tag = this.tagNameBuffer.toLowerCase(); + + return (tag === 'title' && this.input.substr(this.index, 8) !== '') || + (tag === 'style' && this.input.substr(this.index, 8) !== '') || + (tag === 'script' && this.input.substr(this.index, 9) !== ''); + } + states: { [k in TokenizerState]?: (this: EventedTokenizer) => void } = { beforeData() { let char = this.peek(); - if (char === '<') { + if (char === '<' && !this.isIgnoredEndTag()) { this.transitionTo(TokenizerState.tagOpen); this.markTagStart(); this.consume(); @@ -139,7 +147,7 @@ export default class EventedTokenizer { data() { let char = this.peek(); - if (char === '<') { + if (char === '<' && !this.isIgnoredEndTag()) { this.delegate.finishData(); this.transitionTo(TokenizerState.tagOpen); this.markTagStart(); diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts index 1ea0141..9c3983a 100644 --- a/tests/tokenizer-tests.ts +++ b/tests/tokenizer-tests.ts @@ -222,6 +222,38 @@ QUnit.test('A newline immediately following a