From 51f79565c38c6a541ac1d28c10ad3dbb518cfb11 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Fri, 20 Apr 2018 20:45:13 +0200 Subject: [PATCH] refactor(parser): merge lexer with parser & optimize parser --- src/lexer.js | 425 ---------------------- src/parser.js | 836 ++++++++++++++++++++++++++++++++------------ test/parser.spec.js | 53 ++- 3 files changed, 672 insertions(+), 642 deletions(-) delete mode 100644 src/lexer.js diff --git a/src/lexer.js b/src/lexer.js deleted file mode 100644 index 447c1228..00000000 --- a/src/lexer.js +++ /dev/null @@ -1,425 +0,0 @@ -export class Token { - constructor(index, text) { - this.index = index; - this.text = text; - } - - withOp(op) { - this.opKey = op; - return this; - } - - withGetterSetter(key) { - this.key = key; - return this; - } - - withValue(value) { - this.value = value; - return this; - } - - toString() { - return `Token(${this.text})`; - } -} - -export class Lexer { - lex(text) { - let scanner = new Scanner(text); - let tokens = []; - let token = scanner.scanToken(); - - while (token) { - tokens.push(token); - token = scanner.scanToken(); - } - - return tokens; - } -} - -export class Scanner { - constructor(input) { - this.input = input; - this.length = input.length; - this.peek = 0; - this.index = -1; - - this.advance(); - } - - scanToken() { - // Skip whitespace. - while (this.peek <= $SPACE) { - if (++this.index >= this.length) { - this.peek = $EOF; - return null; - } - - this.peek = this.input.charCodeAt(this.index); - } - - // Handle identifiers and numbers. - if (isIdentifierStart(this.peek)) { - return this.scanIdentifier(); - } - - if (isDigit(this.peek)) { - return this.scanNumber(this.index); - } - - let start = this.index; - - switch (this.peek) { - case $PERIOD: - this.advance(); - return isDigit(this.peek) ? this.scanNumber(start) : new Token(start, '.'); - case $LPAREN: - case $RPAREN: - case $LBRACE: - case $RBRACE: - case $LBRACKET: - case $RBRACKET: - case $COMMA: - case $COLON: - case $SEMICOLON: - return this.scanCharacter(start, String.fromCharCode(this.peek)); - case $SQ: - case $DQ: - return this.scanString(); - case $PLUS: - case $MINUS: - case $STAR: - case $SLASH: - case $PERCENT: - case $CARET: - case $QUESTION: - return this.scanOperator(start, String.fromCharCode(this.peek)); - case $LT: - case $GT: - case $BANG: - case $EQ: - return this.scanComplexOperator(start, $EQ, String.fromCharCode(this.peek), '='); - case $AMPERSAND: - return this.scanComplexOperator(start, $AMPERSAND, '&', '&'); - case $BAR: - return this.scanComplexOperator(start, $BAR, '|', '|'); - case $NBSP: - while (isWhitespace(this.peek)) { - this.advance(); - } - - return this.scanToken(); - // no default - } - - let character = String.fromCharCode(this.peek); - this.error(`Unexpected character [${character}]`); - return null; - } - - scanCharacter(start, text) { - assert(this.peek === text.charCodeAt(0)); - this.advance(); - return new Token(start, text); - } - - scanOperator(start, text) { - assert(this.peek === text.charCodeAt(0)); - assert(OPERATORS[text] === 1); - this.advance(); - return new Token(start, text).withOp(text); - } - - scanComplexOperator(start, code, one, two) { - assert(this.peek === one.charCodeAt(0)); - this.advance(); - - let text = one; - - if (this.peek === code) { - this.advance(); - text += two; - } - - if (this.peek === code) { - this.advance(); - text += two; - } - - assert(OPERATORS[text] === 1); - - return new Token(start, text).withOp(text); - } - - scanIdentifier() { - assert(isIdentifierStart(this.peek)); - let start = this.index; - - this.advance(); - - while (isIdentifierPart(this.peek)) { - this.advance(); - } - - let text = this.input.substring(start, this.index); - let result = new Token(start, text); - - // TODO(kasperl): Deal with null, undefined, true, and false in - // a cleaner and faster way. - if (OPERATORS[text] === 1) { - result.withOp(text); - } else { - result.withGetterSetter(text); - } - - return result; - } - - scanNumber(start) { - assert(isDigit(this.peek)); - let simple = (this.index === start); - this.advance(); // Skip initial digit. - - while (true) { // eslint-disable-line no-constant-condition - if (!isDigit(this.peek)) { - if (this.peek === $PERIOD) { - simple = false; - } else if (isExponentStart(this.peek)) { - this.advance(); - - if (isExponentSign(this.peek)) { - this.advance(); - } - - if (!isDigit(this.peek)) { - this.error('Invalid exponent', -1); - } - - simple = false; - } else { - break; - } - } - - this.advance(); - } - - let text = this.input.substring(start, this.index); - let value = simple ? parseInt(text, 10) : parseFloat(text); - return new Token(start, text).withValue(value); - } - - scanString() { - assert(this.peek === $SQ || this.peek === $DQ); - - let start = this.index; - let quote = this.peek; - - this.advance(); // Skip initial quote. - - let buffer; - let marker = this.index; - - while (this.peek !== quote) { - if (this.peek === $BACKSLASH) { - if (!buffer) { - buffer = []; - } - - buffer.push(this.input.substring(marker, this.index)); - this.advance(); - - let unescaped; - - if (this.peek === $u) { - // TODO(kasperl): Check bounds? Make sure we have test - // coverage for this. - let hex = this.input.substring(this.index + 1, this.index + 5); - - if (!/[A-Z0-9]{4}/.test(hex)) { - this.error(`Invalid unicode escape [\\u${hex}]`); - } - - unescaped = parseInt(hex, 16); - - for (let i = 0; i < 5; ++i) { - this.advance(); - } - } else { - unescaped = unescape(this.peek); - this.advance(); - } - - buffer.push(String.fromCharCode(unescaped)); - marker = this.index; - } else if (this.peek === $EOF) { - this.error('Unterminated quote'); - } else { - this.advance(); - } - } - - let last = this.input.substring(marker, this.index); - this.advance(); // Skip terminating quote. - let text = this.input.substring(start, this.index); - - // Compute the unescaped string value. - let unescaped = last; - - if (buffer !== null && buffer !== undefined) { - buffer.push(last); - unescaped = buffer.join(''); - } - - return new Token(start, text).withValue(unescaped); - } - - advance() { - if (++this.index >= this.length) { - this.peek = $EOF; - } else { - this.peek = this.input.charCodeAt(this.index); - } - } - - error(message, offset = 0) { - // TODO(kasperl): Try to get rid of the offset. It is only used to match - // the error expectations in the lexer tests for numbers with exponents. - let position = this.index + offset; - throw new Error(`Lexer Error: ${message} at column ${position} in expression [${this.input}]`); - } -} - -const OPERATORS = { - 'undefined': 1, - 'null': 1, - 'true': 1, - 'false': 1, - '+': 1, - '-': 1, - '*': 1, - '/': 1, - '%': 1, - '^': 1, - '=': 1, - '==': 1, - '===': 1, - '!=': 1, - '!==': 1, - '<': 1, - '>': 1, - '<=': 1, - '>=': 1, - '&&': 1, - '||': 1, - '&': 1, - '|': 1, - '!': 1, - '?': 1 -}; - -const $EOF = 0; -const $TAB = 9; -const $LF = 10; -const $VTAB = 11; -const $FF = 12; -const $CR = 13; -const $SPACE = 32; -const $BANG = 33; -const $DQ = 34; -const $$ = 36; -const $PERCENT = 37; -const $AMPERSAND = 38; -const $SQ = 39; -const $LPAREN = 40; -const $RPAREN = 41; -const $STAR = 42; -const $PLUS = 43; -const $COMMA = 44; -const $MINUS = 45; -const $PERIOD = 46; -const $SLASH = 47; -const $COLON = 58; -const $SEMICOLON = 59; -const $LT = 60; -const $EQ = 61; -const $GT = 62; -const $QUESTION = 63; - -const $0 = 48; -const $9 = 57; - -const $A = 65; -const $E = 69; -const $Z = 90; - -const $LBRACKET = 91; -const $BACKSLASH = 92; -const $RBRACKET = 93; -const $CARET = 94; -const $_ = 95; - -const $a = 97; -const $e = 101; -const $f = 102; -const $n = 110; -const $r = 114; -const $t = 116; -const $u = 117; -const $v = 118; -const $z = 122; - -const $LBRACE = 123; -const $BAR = 124; -const $RBRACE = 125; -const $NBSP = 160; - -function isWhitespace(code) { - return (code >= $TAB && code <= $SPACE) || (code === $NBSP); -} - -function isIdentifierStart(code) { - return ($a <= code && code <= $z) - || ($A <= code && code <= $Z) - || (code === $_) - || (code === $$); -} - -function isIdentifierPart(code) { - return ($a <= code && code <= $z) - || ($A <= code && code <= $Z) - || ($0 <= code && code <= $9) - || (code === $_) - || (code === $$); -} - -function isDigit(code) { - return ($0 <= code && code <= $9); -} - -function isExponentStart(code) { - return (code === $e || code === $E); -} - -function isExponentSign(code) { - return (code === $MINUS || code === $PLUS); -} - -function unescape(code) { - switch (code) { - case $n: return $LF; - case $f: return $FF; - case $r: return $CR; - case $t: return $TAB; - case $v: return $VTAB; - default: return code; - } -} - -function assert(condition, message) { - if (!condition) { - throw message || 'Assertion failed'; - } -} diff --git a/src/parser.js b/src/parser.js index b1f05b09..fb012088 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,4 +1,3 @@ -import {Lexer, Token} from './lexer'; import { Chain, ValueConverter, Assign, Conditional, AccessThis, AccessScope, AccessMember, AccessKeyed, @@ -7,50 +6,51 @@ import { LiteralPrimitive, LiteralArray, LiteralObject, LiteralString } from './ast'; -let EOF = new Token(-1, null); - export class Parser { constructor() { - this.cache = {}; - this.lexer = new Lexer(); + this.cache = Object.create(null); } parse(input) { input = input || ''; return this.cache[input] - || (this.cache[input] = new ParserImplementation(this.lexer, input).parseChain()); + || (this.cache[input] = new ParserImplementation(input).parseChain()); } } export class ParserImplementation { - constructor(lexer, input) { + constructor(input) { this.index = 0; + this.startIndex = 0; + this.lastIndex = 0; this.input = input; - this.tokens = lexer.lex(input); - } - - get peek() { - return (this.index < this.tokens.length) ? this.tokens[this.index] : EOF; + this.length = input.length; + this.token = T_EndOfSource; + this.tokenValue = undefined; + this.tokenRaw = ''; + this.lastValue = 0; } parseChain() { + this.nextToken(); + let isChain = false; let expressions = []; - while (this.optional(';')) { - isChain = true; - } + while (this.token !== T_EndOfSource) { + while (this.optional(T_Semicolon)) { + isChain = true; + } - while (this.index < this.tokens.length) { - if (this.peek.text === ')' || this.peek.text === '}' || this.peek.text === ']') { - this.error(`Unconsumed token ${this.peek.text}`); + if ((this.token & T_ClosingToken) === T_ClosingToken) { + this.error(`Unconsumed token ${String.fromCharCode(this.tokenValue)}`); } - let expr = this.parseBindingBehavior(); + const expr = this.parseBindingBehavior(); expressions.push(expr); - while (this.optional(';')) { + while (this.optional(T_Semicolon)) { isChain = true; } @@ -65,13 +65,13 @@ export class ParserImplementation { parseBindingBehavior() { let result = this.parseValueConverter(); - while (this.optional('&')) { - let name = this.peek.text; + while (this.optional(T_BindingBehavior)) { + let name = this.tokenValue; let args = []; - this.advance(); + this.nextToken(); - while (this.optional(':')) { + while (this.optional(T_Colon)) { args.push(this.parseExpression()); } @@ -84,14 +84,14 @@ export class ParserImplementation { parseValueConverter() { let result = this.parseExpression(); - while (this.optional('|')) { - let name = this.peek.text; // TODO(kasperl): Restrict to identifier? + while (this.optional(T_ValueConverter)) { + let name = this.tokenValue; let args = []; - this.advance(); + this.nextToken(); - while (this.optional(':')) { - // TODO(kasperl): Is this really supposed to be expressions? + while (this.optional(T_Colon)) { + // todo(kasperl): Is this really supposed to be expressions? args.push(this.parseExpression()); } @@ -102,18 +102,18 @@ export class ParserImplementation { } parseExpression() { - let start = this.peek.index; + let start = this.index; let result = this.parseConditional(); - while (this.peek.text === '=') { + while (this.token === T_Assign) { if (!result.isAssignable) { - let end = (this.index < this.tokens.length) ? this.peek.index : this.input.length; - let expression = this.input.substring(start, end); + let end = (this.index < this.length) ? this.index : this.length; + let expression = this.input.slice(start, end); this.error(`Expression ${expression} is not assignable`); } - this.expect('='); + this.expect(T_Assign); result = new Assign(result, this.parseConditional()); } @@ -121,15 +121,15 @@ export class ParserImplementation { } parseConditional() { - let start = this.peek.index; - let result = this.parseLogicalOr(); + let start = this.index; + let result = this.parseBinary(0); - if (this.optional('?')) { + if (this.optional(T_QuestionMark)) { let yes = this.parseExpression(); - if (!this.optional(':')) { - let end = (this.index < this.tokens.length) ? this.peek.index : this.input.length; - let expression = this.input.substring(start, end); + if (!this.optional(T_Colon)) { + let end = (this.index < this.length) ? this.index : this.length; + let expression = this.input.slice(start, end); this.error(`Conditional expression ${expression} requires all 3 expressions`); } @@ -141,101 +141,38 @@ export class ParserImplementation { return result; } - parseLogicalOr() { - let result = this.parseLogicalAnd(); + parseBinary(minPrecedence) { + let left = this.parseUnary(); - while (this.optional('||')) { - result = new Binary('||', result, this.parseLogicalAnd()); + if ((this.token & T_IsBinaryOp) !== T_IsBinaryOp) { + return left; } - return result; - } - - parseLogicalAnd() { - let result = this.parseEquality(); - - while (this.optional('&&')) { - result = new Binary('&&', result, this.parseEquality()); - } - - return result; - } - - parseEquality() { - let result = this.parseRelational(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('==')) { - result = new Binary('==', result, this.parseRelational()); - } else if (this.optional('!=')) { - result = new Binary('!=', result, this.parseRelational()); - } else if (this.optional('===')) { - result = new Binary('===', result, this.parseRelational()); - } else if (this.optional('!==')) { - result = new Binary('!==', result, this.parseRelational()); - } else { - return result; - } - } - } - - parseRelational() { - let result = this.parseAdditive(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('<')) { - result = new Binary('<', result, this.parseAdditive()); - } else if (this.optional('>')) { - result = new Binary('>', result, this.parseAdditive()); - } else if (this.optional('<=')) { - result = new Binary('<=', result, this.parseAdditive()); - } else if (this.optional('>=')) { - result = new Binary('>=', result, this.parseAdditive()); - } else { - return result; + while ((this.token & T_IsBinaryOp) === T_IsBinaryOp) { + const opToken = this.token; + const precedence = opToken & T_Precedence; + if (precedence < minPrecedence) { + break; } + this.nextToken(); + left = new Binary(TokenValues[opToken & T_TokenMask], left, this.parseBinary(precedence)); } + return left; } - parseAdditive() { - let result = this.parseMultiplicative(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('+')) { - result = new Binary('+', result, this.parseMultiplicative()); - } else if (this.optional('-')) { - result = new Binary('-', result, this.parseMultiplicative()); - } else { - return result; + parseUnary() { + const opToken = this.token; + if ((opToken & T_IsUnaryOp) === T_IsUnaryOp) { + this.nextToken(); + switch(opToken) { + case T_Add: + return this.parseUnary(); + case T_Subtract: + return new Binary('-', new LiteralPrimitive(0), this.parseUnary()); + case T_LogicalNot: + return new PrefixNot('!', this.parseUnary()); } } - } - - parseMultiplicative() { - let result = this.parsePrefix(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('*')) { - result = new Binary('*', result, this.parsePrefix()); - } else if (this.optional('%')) { - result = new Binary('%', result, this.parsePrefix()); - } else if (this.optional('/')) { - result = new Binary('/', result, this.parsePrefix()); - } else { - return result; - } - } - } - - parsePrefix() { - if (this.optional('+')) { - return this.parsePrefix(); // TODO(kasperl): This is different than the original parser. - } else if (this.optional('-')) { - return new Binary('-', new LiteralPrimitive(0), this.parsePrefix()); - } else if (this.optional('!')) { - return new PrefixNot('!', this.parsePrefix()); - } - return this.parseAccessOrCallMember(); } @@ -243,14 +180,14 @@ export class ParserImplementation { let result = this.parsePrimary(); while (true) { // eslint-disable-line no-constant-condition - if (this.optional('.')) { - let name = this.peek.text; // TODO(kasperl): Check that this is an identifier. Are keywords okay? + if (this.optional(T_Period)) { + let name = this.tokenValue; // todo(kasperl): Check that this is an identifier. Are keywords okay? - this.advance(); + this.nextToken(); - if (this.optional('(')) { - let args = this.parseExpressionList(')'); - this.expect(')'); + if (this.optional(T_LeftParen)) { + let args = this.parseExpressionList(T_RightParen); + this.expect(T_RightParen); if (result instanceof AccessThis) { result = new CallScope(name, args, result.ancestor); } else { @@ -263,13 +200,13 @@ export class ParserImplementation { result = new AccessMember(result, name); } } - } else if (this.optional('[')) { + } else if (this.optional(T_LeftBracket)) { let key = this.parseExpression(); - this.expect(']'); + this.expect(T_RightBracket); result = new AccessKeyed(result, key); - } else if (this.optional('(')) { - let args = this.parseExpressionList(')'); - this.expect(')'); + } else if (this.optional(T_LeftParen)) { + let args = this.parseExpressionList(T_RightParen); + this.expect(T_RightParen); result = new CallFunction(result, args); } else { return result; @@ -278,70 +215,78 @@ export class ParserImplementation { } parsePrimary() { - if (this.optional('(')) { - let result = this.parseExpression(); - this.expect(')'); - return result; - } else if (this.optional('null')) { - return new LiteralPrimitive(null); - } else if (this.optional('undefined')) { - return new LiteralPrimitive(undefined); - } else if (this.optional('true')) { - return new LiteralPrimitive(true); - } else if (this.optional('false')) { - return new LiteralPrimitive(false); - } else if (this.optional('[')) { - let elements = this.parseExpressionList(']'); - this.expect(']'); - return new LiteralArray(elements); - } else if (this.peek.text === '{') { - return this.parseObject(); - } else if (this.peek.key !== null && this.peek.key !== undefined) { - return this.parseAccessOrCallScope(); - } else if (this.peek.value !== null && this.peek.value !== undefined) { - let value = this.peek.value; - this.advance(); - return value instanceof String || typeof value === 'string' ? new LiteralString(value) : new LiteralPrimitive(value); - } else if (this.index >= this.tokens.length) { - throw new Error(`Unexpected end of expression: ${this.input}`); - } else { - this.error(`Unexpected token ${this.peek.text}`); + const token = this.token; + switch (token) { + case T_Identifier: + case T_ParentScope: + return this.parseAccessOrCallScope(); + case T_ThisScope: + this.nextToken(); + return new AccessThis(0); + case T_LeftParen: + this.nextToken(); + const result = this.parseExpression(); + this.expect(T_RightParen); + return result; + case T_LeftBracket: + this.nextToken(); + const elements = this.parseExpressionList(T_RightBracket); + this.expect(T_RightBracket); + return new LiteralArray(elements); + case T_LeftBrace: + return this.parseObject(); + case T_StringLiteral: + { + const value = this.tokenValue; + this.nextToken(); + return new LiteralString(value); + } + case T_NumericLiteral: + { + const value = this.tokenValue; + this.nextToken(); + return new LiteralPrimitive(value); + } + case T_NullKeyword: + case T_UndefinedKeyword: + case T_TrueKeyword: + case T_FalseKeyword: + this.nextToken(); + return new LiteralPrimitive(TokenValues[token & T_TokenMask]); + default: + if (this.index >= this.length) { + throw new Error(`Unexpected end of expression at column ${this.index} of ${this.input}`); + } else { + const expression = this.input.slice(this.lastIndex, this.index); + this.error(`Unexpected token ${expression}`); + } } } parseAccessOrCallScope() { - let name = this.peek.key; - - this.advance(); + let name = this.tokenValue; + let token = this.token; - if (name === '$this') { - return new AccessThis(0); - } + this.nextToken(); let ancestor = 0; - while (name === '$parent') { + while (token === T_ParentScope) { ancestor++; - if (this.optional('.')) { - name = this.peek.key; - this.advance(); - } else if (this.peek === EOF - || this.peek.text === '(' - || this.peek.text === ')' - || this.peek.text === '[' - || this.peek.text === '}' - || this.peek.text === ',' - || this.peek.text === '|' - || this.peek.text === '&' - ) { + if (this.optional(T_Period)) { + name = this.tokenValue; + token = this.token; + this.nextToken(); + } else if ((this.token & T_AccessScopeTerminal) === T_AccessScopeTerminal) { return new AccessThis(ancestor); } else { - this.error(`Unexpected token ${this.peek.text}`); + const expression = this.input.slice(this.lastIndex, this.index); + this.error(`Unexpected token ${expression}`); } } - if (this.optional('(')) { - let args = this.parseExpressionList(')'); - this.expect(')'); + if (this.optional(T_LeftParen)) { + let args = this.parseExpressionList(T_RightParen); + this.expect(T_RightParen); return new CallScope(name, args, ancestor); } @@ -352,28 +297,28 @@ export class ParserImplementation { let keys = []; let values = []; - this.expect('{'); + this.expect(T_LeftBrace); - if (this.peek.text !== '}') { + if (this.token ^ T_RightBrace) { do { - // TODO(kasperl): Stricter checking. Only allow identifiers + // todo(kasperl): Stricter checking. Only allow identifiers // and strings as keys. Maybe also keywords? - let peek = this.peek; - let value = peek.value; - keys.push(typeof value === 'string' ? value : peek.text); - this.advance(); - if (peek.key && (this.peek.text === ',' || this.peek.text === '}')) { + let token = this.token; + keys.push(this.tokenValue); + + this.nextToken(); + if (token === T_Identifier && (this.token === T_Comma || this.token === T_RightBrace)) { --this.index; values.push(this.parseAccessOrCallScope()); } else { - this.expect(':'); + this.expect(T_Colon); values.push(this.parseExpression()); } - } while (this.optional(',')); + } while (this.optional(T_Comma)); } - this.expect('}'); + this.expect(T_RightBrace); return new LiteralObject(keys, values); } @@ -381,41 +326,500 @@ export class ParserImplementation { parseExpressionList(terminator) { let result = []; - if (this.peek.text !== terminator) { + if (this.token ^ terminator) { do { result.push(this.parseExpression()); - } while (this.optional(',')); + } while (this.optional(T_Comma)); } return result; } - optional(text) { - if (this.peek.text === text) { - this.advance(); + nextToken() { + this.lastIndex = this.index; + + return this.token = this.scanToken(); + } + + scanToken() { + while (this.index < this.length) { + this.startIndex = this.index; + let current = this.input.charCodeAt(this.index); + // skip whitespace. + if (current <= $SPACE) { + this.index++; + continue; + } + + // handle identifiers and numbers. + if (isIdentifierStart(current)) { + return this.scanIdentifier(); + } + + if (isDigit(current)) { + return this.scanNumber(false); + } + + let start = this.index; + + switch (current) { + case $PERIOD: + { + if (this.index < this.length) { + const next = this.input.charCodeAt(this.index + 1); + if (next >= $0 && next <= $9) { + return this.scanNumber(true); + } + this.index++; + } + return T_Period; + } + case $LPAREN: + this.index++; + return T_LeftParen; + case $RPAREN: + this.index++; + return T_RightParen; + case $LBRACE: + this.index++; + return T_LeftBrace; + case $RBRACE: + this.index++; + return T_RightBrace; + case $LBRACKET: + this.index++; + return T_LeftBracket; + case $RBRACKET: + this.index++; + return T_RightBracket; + case $COMMA: + this.index++; + return T_Comma; + case $COLON: + this.index++; + return T_Colon; + case $SEMICOLON: + this.index++; + return T_Semicolon; + case $SQ: + case $DQ: + return this.scanString(); + case $PLUS: + this.index++; + return T_Add; + case $MINUS: + this.index++; + return T_Subtract; + case $STAR: + this.index++; + return T_Multiply; + case $SLASH: + this.index++; + return T_Divide; + case $PERCENT: + this.index++; + return T_Modulo; + case $CARET: + this.index++; + return T_BitwiseXor; + case $QUESTION: + this.index++; + return T_QuestionMark; + case $LT: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_LessThanOrEqual; + } + return T_LessThan; + } + case $GT: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_GreaterThanOrEqual; + } + return T_GreaterThan; + } + case $BANG: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_StrictNotEqual; + } + return T_LooseNotEqual; + } + return T_LogicalNot; + } + case $EQ: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_StrictEqual; + } + return T_LooseEqual; + } + return T_Assign; + } + case $AMPERSAND: + { + let next = this.input.charCodeAt(++this.index); + if (next === $AMPERSAND) { + this.index++; + return T_LogicalAnd; + } + return T_BindingBehavior; + } + case $BAR: + { + let next = this.input.charCodeAt(++this.index); + if (next === $BAR) { + this.index++; + return T_LogicalOr; + } + return T_ValueConverter; + } + case $NBSP: + this.index++; + continue; + // no default + } + + let character = String.fromCharCode(this.input.charCodeAt(this.index)); + this.error(`Unexpected character [${character}]`); + return null; + } + + return T_EndOfSource; + } + + scanIdentifier() { + const start = this.index; + let char = this.input.charCodeAt(++this.index); + + while (isIdentifierPart(char)) { + char = this.input.charCodeAt(++this.index); + } + + let text = this.input.slice(start, this.index); + this.tokenValue = text; + + let len = text.length; + if (len >= 4 && len <= 9) { + const token = KeywordLookup[text]; + if (token !== undefined) { + return token; + } + } + + return T_Identifier; + } + + scanNumber(isFloat) { + let start = this.index; + this.index++; + let char = this.input.charCodeAt(this.index); + loop: while (true) { + switch(char) { + case $PERIOD: + // todo(fkleuver): Should deal with spread operator elsewhere, + // and throw here when seeing more than one period + isFloat = true; + break; + case $e: + case $E: + char = this.input.charCodeAt(++this.index); + if (char === $PLUS || char === $MINUS) { + char = this.input.charCodeAt(++this.index); + } + if (char < $0 || char > $9) { + this.error('Invalid exponent', -1); + } + isFloat = true; + break; + default: + if (char < $0 || char > $9 || this.index === this.length) { + break loop; + } + } + char = this.input.charCodeAt(++this.index); + } + + const text = this.input.slice(start, this.index); + this.tokenValue = isFloat ? parseFloat(text) : parseInt(text, 10); + return T_NumericLiteral; + } + + scanString() { + let start = this.index; + let quote = this.input.charCodeAt(this.index++); // Skip initial quote. + + let buffer; + let marker = this.index; + let char = this.input.charCodeAt(this.index); + + while (char !== quote) { + if (char === $BACKSLASH) { + if (!buffer) { + buffer = []; + } + + buffer.push(this.input.slice(marker, this.index)); + char = this.input.charCodeAt(++this.index) + + let unescaped; + + if (char === $u) { + // todo(kasperl): Check bounds? Make sure we have test + // coverage for this. + let hex = this.input.slice(this.index + 1, this.index + 5); + + if (!/[A-Z0-9]{4}/.test(hex)) { + this.error(`Invalid unicode escape [\\u${hex}]`); + } + + unescaped = parseInt(hex, 16); + this.index += 5; + } else { + unescaped = unescape(this.input.charCodeAt(this.index)); + this.index++; + } + + buffer.push(String.fromCharCode(unescaped)); + marker = this.index; + } else if (char === $EOF) { + this.error('Unterminated quote'); + } else { + this.index++; + } + + char = this.input.charCodeAt(this.index) + } + + let last = this.input.slice(marker, this.index); + this.index++; // Skip terminating quote. + let text = this.input.slice(start, this.index); + + // Compute the unescaped string value. + let unescaped = last; + + if (buffer !== null && buffer !== undefined) { + buffer.push(last); + unescaped = buffer.join(''); + } + + this.tokenValue = unescaped; + this.tokenRaw = text; + return T_StringLiteral; + } + + error(message, offset = 0) { + // todo(kasperl): Try to get rid of the offset. It is only used to match + // the error expectations in the lexer tests for numbers with exponents. + let position = this.index + offset; + throw new Error(`Lexer Error: ${message} at column ${position} in expression [${this.input}]`); + } + + optional(type) { + if (this.token === type) { + this.nextToken(); return true; } return false; } - expect(text) { - if (this.peek.text === text) { - this.advance(); + expect(type) { + if (this.token === type) { + this.nextToken(); } else { - this.error(`Missing expected ${text}`); + this.error(`Missing expected token type ${type}`); } } +} - advance() { - this.index++; - } +const $EOF = 0; +const $TAB = 9; +const $LF = 10; +const $VTAB = 11; +const $FF = 12; +const $CR = 13; +const $SPACE = 32; +const $BANG = 33; +const $DQ = 34; +const $$ = 36; +const $PERCENT = 37; +const $AMPERSAND = 38; +const $SQ = 39; +const $LPAREN = 40; +const $RPAREN = 41; +const $STAR = 42; +const $PLUS = 43; +const $COMMA = 44; +const $MINUS = 45; +const $PERIOD = 46; +const $SLASH = 47; +const $COLON = 58; +const $SEMICOLON = 59; +const $LT = 60; +const $EQ = 61; +const $GT = 62; +const $QUESTION = 63; + +const $0 = 48; +const $9 = 57; + +const $A = 65; +const $E = 69; +const $Z = 90; + +const $LBRACKET = 91; +const $BACKSLASH = 92; +const $RBRACKET = 93; +const $CARET = 94; +const $_ = 95; + +const $a = 97; +const $e = 101; +const $f = 102; +const $n = 110; +const $r = 114; +const $t = 116; +const $u = 117; +const $v = 118; +const $z = 122; + +const $LBRACE = 123; +const $BAR = 124; +const $RBRACE = 125; +const $NBSP = 160; + +function isIdentifierStart(code) { + return ($a <= code && code <= $z) + || ($A <= code && code <= $Z) + || (code === $_) + || (code === $$); +} - error(message) { - let location = (this.index < this.tokens.length) - ? `at column ${this.tokens[this.index].index + 1} in` - : 'at the end of the expression'; +function isIdentifierPart(code) { + return ($a <= code && code <= $z) + || ($A <= code && code <= $Z) + || ($0 <= code && code <= $9) + || (code === $_) + || (code === $$); +} + +function isDigit(code) { + return ($0 <= code && code <= $9); +} - throw new Error(`Parser Error: ${message} ${location} [${this.input}]`); +function unescape(code) { + switch (code) { + case $n: return $LF; + case $f: return $FF; + case $r: return $CR; + case $t: return $TAB; + case $v: return $VTAB; + default: return code; } } + +/* Performing a bitwise and (&) with this value (63) will return only the + * token bit, which corresponds to the index of the token's value in the + * TokenValues array */ +const T_TokenMask = (1 << 6) - 1; + +/* Shifting 6 bits to the left gives us a step size of 64 in a range of + * 64 (1 << 6) to 448 (7 << 6) for our precedence bit + * This is the lowest value which does not overlap with the token bits 0-38. */ +const T_PrecedenceShift = 6; + +/* Performing a bitwise and (&) with this value will return only the + * precedence bit, which is used to determine the parsing order of bitwise + * expressions */ +const T_Precedence = 7 << 6; + +/** ')' | '}' | ']' */ +const T_ClosingToken = 1 << 9; +/** EndOfSource | '(' | '}' | ')' | ',' | '[' | '&' | '|' */ +const T_AccessScopeTerminal = 1 << 10; +const T_EndOfSource = 1 << 11 | T_AccessScopeTerminal; +const T_Identifier = 1 << 12; +const T_NumericLiteral = 1 << 13; +const T_StringLiteral = 1 << 14; +const T_IsBinaryOp = 1 << 15; +const T_IsUnaryOp = 1 << 16; + +/** false */ const T_FalseKeyword = 0; +/** true */ const T_TrueKeyword = 1; +/** null */ const T_NullKeyword = 2; +/** undefined */ const T_UndefinedKeyword = 3; +/** '$this' */ const T_ThisScope = 4; +/** '$parent' */ const T_ParentScope = 5; + +/** '(' */const T_LeftParen = 6 | T_AccessScopeTerminal; +/** '{' */const T_LeftBrace = 7; +/** '.' */const T_Period = 8; +/** '}' */const T_RightBrace = 9 | T_ClosingToken | T_AccessScopeTerminal; +/** ')' */const T_RightParen = 10 | T_ClosingToken | T_AccessScopeTerminal; +/** ';' */const T_Semicolon = 11; +/** ',' */const T_Comma = 12 | T_AccessScopeTerminal; +/** '[' */const T_LeftBracket = 13 | T_AccessScopeTerminal; +/** ']' */const T_RightBracket = 14 | T_ClosingToken; +/** ':' */const T_Colon = 15; +/** '?' */const T_QuestionMark = 16; +/** ''' */const T_SingleQuote = 17; +/** '"' */const T_DoubleQuote = 18; + +/** '&' */ const T_BindingBehavior = 19 | T_AccessScopeTerminal; +/** '|' */ const T_ValueConverter = 20 | T_AccessScopeTerminal; +/** '||' */ const T_LogicalOr = 21 | T_IsBinaryOp | 1 << T_PrecedenceShift; +/** '&&' */ const T_LogicalAnd = 22 | T_IsBinaryOp | 2 << T_PrecedenceShift; +/** '^' */ const T_BitwiseXor = 23 | T_IsBinaryOp | 3 << T_PrecedenceShift; +/** '==' */ const T_LooseEqual = 24 | T_IsBinaryOp | 4 << T_PrecedenceShift; +/** '!=' */ const T_LooseNotEqual = 25 | T_IsBinaryOp | 4 << T_PrecedenceShift; +/** '===' */const T_StrictEqual = 26 | T_IsBinaryOp | 4 << T_PrecedenceShift; +/** '!== '*/const T_StrictNotEqual = 27 | T_IsBinaryOp | 4 << T_PrecedenceShift; +/** '<' */ const T_LessThan = 28 | T_IsBinaryOp | 5 << T_PrecedenceShift; +/** '>' */ const T_GreaterThan = 29 | T_IsBinaryOp | 5 << T_PrecedenceShift; +/** '<=' */ const T_LessThanOrEqual = 30 | T_IsBinaryOp | 5 << T_PrecedenceShift; +/** '>=' */ const T_GreaterThanOrEqual = 31 | T_IsBinaryOp | 5 << T_PrecedenceShift; +/** '+' */ const T_Add = 32 | T_IsUnaryOp | T_IsBinaryOp | 6 << T_PrecedenceShift; +/** '-' */ const T_Subtract = 33 | T_IsUnaryOp | T_IsBinaryOp | 6 << T_PrecedenceShift; +/** '*' */ const T_Multiply = 34 | T_IsBinaryOp | 7 << T_PrecedenceShift; +/** '%' */ const T_Modulo = 35 | T_IsBinaryOp | 7 << T_PrecedenceShift; +/** '/' */ const T_Divide = 36 | T_IsBinaryOp | 7 << T_PrecedenceShift; +/** '=' */ const T_Assign = 37; +/** '!' */ const T_LogicalNot = 38 | T_IsUnaryOp; + +const KeywordLookup = Object.create(null, { + true: {value: T_TrueKeyword}, + null: {value: T_NullKeyword}, + false: {value: T_FalseKeyword}, + undefined: {value: T_UndefinedKeyword}, + $this: {value: T_ThisScope}, + $parent: {value: T_ParentScope} +}); + +/** + * Array for mapping tokens to token values. The indices of the values + * correspond to the token bits 0-38. + * For this to work properly, the values in the array must be kept in + * the same order as the token bits. + */ +const TokenValues = [ + false, true, null, undefined, '$this', '$parent', + + '(', '{', '.', '}', ')', ';', ',', '[', ']', ':', '?', '\'', '"', + + '&', '|', '||', '&&', '^', '==', '!=', '===', '!==', '<', '>', + '<=', '>=', '+', '-', '*', '%', '/', '=', '!' +]; diff --git a/test/parser.spec.js b/test/parser.spec.js index 7b7d6b1a..3a8cf59a 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -13,7 +13,9 @@ import { CallFunction, AccessThis, AccessAncestor, - Assign + Assign, + Conditional, + Binary } from '../src/ast'; describe('Parser', () => { @@ -52,6 +54,55 @@ describe('Parser', () => { } }); + it('parses conditional', () => { + let expression = parser.parse('foo ? bar : baz'); + expect(expression instanceof Conditional).toBe(true); + expect(expression.condition instanceof AccessScope).toBe(true); + expect(expression.condition.name).toBe('foo'); + expect(expression.yes instanceof AccessScope).toBe(true); + expect(expression.yes.name).toBe('bar'); + expect(expression.no instanceof AccessScope).toBe(true); + expect(expression.no.name).toBe('baz'); + }); + + it('parses nested conditional', () => { + let expression = parser.parse('foo ? bar : foo1 ? bar1 : baz'); + expect(expression instanceof Conditional).toBe(true); + expect(expression.condition instanceof AccessScope).toBe(true); + expect(expression.condition.name).toBe('foo'); + expect(expression.yes instanceof AccessScope).toBe(true); + expect(expression.yes.name).toBe('bar'); + expect(expression.no instanceof Conditional).toBe(true); + expect(expression.no.condition instanceof AccessScope).toBe(true); + expect(expression.no.condition.name).toBe('foo1'); + expect(expression.no.yes instanceof AccessScope).toBe(true); + expect(expression.no.yes.name).toBe('bar1'); + expect(expression.no.no instanceof AccessScope).toBe(true); + expect(expression.no.no.name).toBe('baz'); + }); + + describe('parses binary', () => { + const operators = [ + '&&', '||', + '==', '!=', '===', '!==', + '<', '>', '<=', '>=', + '+', '-', + '*', '%', '/' + ]; + + for (let op of operators) { + it(`\"${op}\"`, () => { + let expression = parser.parse(`foo ${op} bar`); + expect(expression instanceof Binary).toBe(true); + expect(expression.operation).toBe(op); + expect(expression.left instanceof AccessScope).toBe(true); + expect(expression.left.name).toBe('foo'); + expect(expression.right instanceof AccessScope).toBe(true); + expect(expression.right.name).toBe('bar'); + }); + } + }); + it('parses binding behaviors', () => { let expression = parser.parse('foo & bar'); expect(expression instanceof BindingBehavior).toBe(true);