chore: improve parser performance (#8303)

- fast path for attribute quote marks common case - all regexes exclusively passed into read or match_regex which are only successful if matched at the beginning are altered so that the regex has this condition built in, preventing it from searching past the start index --------- Co-authored-by: Yuichiro Yamashita <[email protected]>
sveltejs · Feb 22, 2023 · a71b8b9 · a71b8b9
1 parent e3e912a
commit a71b8b9
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 13 deletions.
diff --git a/src/compiler/parse/index.ts b/src/compiler/parse/index.ts
@@ -132,6 +132,10 @@ export class Parser {
 		return this.template.slice(this.index, this.index + str.length) === str;
 	}
 
+	/**
+	 * Match a regex at the current index
+	 * @param pattern Should have a ^ anchor at the start so the regex doesn't search past the beginning, resulting in worse performance
+	 */
 	match_regex(pattern: RegExp) {
 		const match = pattern.exec(this.template.slice(this.index));
 		if (!match || match.index !== 0) return null;
@@ -148,6 +152,10 @@ export class Parser {
 		}
 	}
 
+	/**
+	 * Search for a regex starting at the current index and return the result if it matches
+	 * @param pattern Should have a ^ anchor at the start so the regex doesn't search past the beginning, resulting in worse performance
+	 */
 	read(pattern: RegExp) {
 		const result = this.match_regex(pattern);
 		if (result) this.index += result.length;

diff --git a/src/compiler/parse/read/script.ts b/src/compiler/parse/read/script.ts
@@ -6,6 +6,7 @@ import parser_errors from '../errors';
 import { regex_not_newline_characters } from '../../utils/patterns';
 
 const regex_closing_script_tag = /<\/script\s*>/;
+const regex_starts_with_closing_script_tag = /^<\/script\s*>/;
 
 function get_context(parser: Parser, attributes: any[], start: number): string {
 	const context = attributes.find(attribute => attribute.name === 'context');
@@ -32,7 +33,7 @@ export default function read_script(parser: Parser, start: number, attributes: N
 	}
 
 	const source = parser.template.slice(0, script_start).replace(regex_not_newline_characters, ' ') + data;
-	parser.read(regex_closing_script_tag);
+	parser.read(regex_starts_with_closing_script_tag);
 
 	let ast: Program;
 

diff --git a/src/compiler/parse/read/style.ts b/src/compiler/parse/read/style.ts
@@ -7,6 +7,7 @@ import { Style } from '../../interfaces';
 import parser_errors from '../errors';
 
 const regex_closing_style_tag = /<\/style\s*>/;
+const regex_starts_with_closing_style_tag = /^<\/style\s*>/;
 
 export default function read_style(parser: Parser, start: number, attributes: Node[]): Style {
 	const content_start = parser.index;
@@ -21,7 +22,7 @@ export default function read_style(parser: Parser, start: number, attributes: No
 
 	// discard styles when css is disabled
 	if (parser.css_mode === 'none') {
-		parser.read(regex_closing_style_tag);
+		parser.read(regex_starts_with_closing_style_tag);
 		return null;
 	}
 
@@ -76,7 +77,7 @@ export default function read_style(parser: Parser, start: number, attributes: No
 		}
 	});
 
-	parser.read(regex_closing_style_tag);
+	parser.read(regex_starts_with_closing_style_tag);
 
 	const end = parser.index;
 

diff --git a/src/compiler/parse/state/mustache.ts b/src/compiler/parse/state/mustache.ts
@@ -33,7 +33,7 @@ function trim_whitespace(block: TemplateNode, trim_before: boolean, trim_after:
 	}
 }
 
-const regex_whitespace_with_closing_curly_brace = /\s*}/;
+const regex_whitespace_with_closing_curly_brace = /^\s*}/;
 
 export default function mustache(parser: Parser) {
 	const start = parser.index;

diff --git a/src/compiler/parse/state/tag.ts b/src/compiler/parse/state/tag.ts
@@ -12,6 +12,9 @@ import { closing_tag_omitted, decode_character_references } from '../utils/html'
 // eslint-disable-next-line no-useless-escape
 const valid_tag_name = /^\!?[a-zA-Z]{1,}:?[a-zA-Z0-9\-]*/;
 
+/** Invalid attribute characters if the attribute is not surrounded by quotes */
+const regex_starts_with_invalid_attr_value = /^(\/>|[\s"'=<>`])/;
+
 const meta_tags = new Map([
 	['svelte:head', 'Head'],
 	['svelte:options', 'Options'],
@@ -293,7 +296,7 @@ function read_tag_name(parser: Parser) {
 
 // eslint-disable-next-line no-useless-escape
 const regex_token_ending_character = /[\s=\/>"']/;
-const regex_quote_characters = /["']/;
+const regex_starts_with_quote_characters = /^["']/;
 
 function read_attribute(parser: Parser, unique_names: Set<string>) {
 	const start = parser.index;
@@ -368,7 +371,7 @@ function read_attribute(parser: Parser, unique_names: Set<string>) {
 		parser.allow_whitespace();
 		value = read_attribute_value(parser);
 		end = parser.index;
-	} else if (parser.match_regex(regex_quote_characters)) {
+	} else if (parser.match_regex(regex_starts_with_quote_characters)) {
 		parser.error(parser_errors.unexpected_token('='), parser.index);
 	}
 
@@ -475,15 +478,13 @@ function read_attribute_value(parser: Parser) {
 		}];
 	}
 
-	const regex = (
-		quote_mark === "'" ? /'/ :
-			quote_mark === '"' ? /"/ :
-				/(\/>|[\s"'=<>`])/
-	);
-
 	let value;
 	try {
-		value = read_sequence(parser, () => !!parser.match_regex(regex), 'in attribute value');
+		value = read_sequence(parser, () => {
+			// handle common case of quote marks existing outside of regex for performance reasons
+			if (quote_mark) return parser.match(quote_mark);
+			return !!parser.match_regex(regex_starts_with_invalid_attr_value);
+		}, 'in attribute value');
 	} catch (error) {
 		if (error.code === 'parse-error') {
 			// if the attribute value didn't close + self-closing tag