diff --git a/.travis.yml b/.travis.yml index 6b996da3c..9d0f2d558 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,8 +15,8 @@ deploy: api_key: secure: DNq1wbqLPHVpJPDx9O89HZM+RJB6v2R7/wk8pok7Z8NT72kUWdvbqcThGhczPO4sZ8cUTJ3ergTCE8hs9mynlR/lX6932U4fj4+uICQL9+G+deBB/t2SNyTBllkE64WrJ9BKmQvIk/Chh7ZJOM0Fro3p2BIq3JsVnfYg1tZ3U5o= file: - - package/chevrotain-binaries-0.5.7.zip - - package/chevrotain-binaries-0.5.7.tar.gz + - package/chevrotain-binaries-0.5.8.zip + - package/chevrotain-binaries-0.5.8.tar.gz on: tags : true all_branches: true diff --git a/bower.json b/bower.json index b2847060e..8506f12d5 100644 --- a/bower.json +++ b/bower.json @@ -1,6 +1,6 @@ { "name": "chevrotain", - "version": "0.5.7", + "version": "0.5.8", "description": "Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers", "main": "release/chevrotain.js", "dependencies": { diff --git a/package.json b/package.json index 9a1387f7e..0c193c1c1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "chevrotain", - "version": "0.5.7", + "version": "0.5.8", "description": "Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers", "keywords": [ "parser", diff --git a/release/chevrotain.d.ts b/release/chevrotain.d.ts index 505cff927..647214c7c 100644 --- a/release/chevrotain.d.ts +++ b/release/chevrotain.d.ts @@ -1,4 +1,4 @@ -/*! chevrotain - v0.5.7 - 2015-11-30 */ +/*! chevrotain - v0.5.8 - 2015-12-08 */ declare module chevrotain { module lang { class HashTable{} @@ -218,6 +218,45 @@ declare module chevrotain { } type LookAheadFunc = () => boolean; type GrammarAction = () => void; + /** + * convenience used to express an empty alternative in an OR (alternation). + * can be used to more clearly describe the intent in a case of empty alternation. + * + * for example: + * + * 1. without using EMPTY_ALT: + * + * this.OR([ + * {ALT: () => { + * this.CONSUME1(OneTok) + * return "1" + * }}, + * {ALT: () => { + * this.CONSUME1(TwoTok) + * return "2" + * }}, + * {ALT: () => { // implicitly empty because there are no invoked grammar rules (OR/MANY/CONSUME...) inside this alternative. + * return "666" + * }}, + * ]) + * + * + * * 2. using EMPTY_ALT: + * + * this.OR([ + * {ALT: () => { + * this.CONSUME1(OneTok) + * return "1" + * }}, + * {ALT: () => { + * this.CONSUME1(TwoTok) + * return "2" + * }}, + * {ALT: EMPTY_ALT("666")}, // explicitly empty, clearer intent + * ]) + * + */ + let EMPTY_ALT: (value: T) => () => T; /** * A Recognizer capable of self analysis to determine it's grammar structure * This is used for more advanced features requiring such information. @@ -431,7 +470,7 @@ declare module chevrotain { * * using the short form is recommended as it will compute the lookahead function * automatically. however this currently has one limitation: - * It only works if the lookahead for the grammar is one. + * It only works if the lookahead for the grammar is one LL(1). * * As in CONSUME the index in the method name indicates the occurrence * of the alternation production in it's top rule. diff --git a/release/chevrotain.js b/release/chevrotain.js index a1c2f682c..c86bbb501 100644 --- a/release/chevrotain.js +++ b/release/chevrotain.js @@ -15,7 +15,7 @@ } }(this, function (_) { -/*! chevrotain - v0.5.7 - 2015-11-30 */ +/*! chevrotain - v0.5.8 - 2015-12-08 */ var chevrotain; (function (chevrotain) { var lang; @@ -1662,21 +1662,46 @@ var chevrotain; " matches and ignore all the others"); } } - /** - * This will return the Index of the alternative to take or -1 if none of the alternatives match - */ - return function () { - var nextToken = this.NEXT_TOKEN(); - for (var i = 0; i < alternativesTokens.length; i++) { - var currAltTokens = alternativesTokens[i]; - for (var j = 0; j < currAltTokens.length; j++) { - if (nextToken instanceof currAltTokens[j]) { - return i; + var hasLastAnEmptyAlt = _.isEmpty(_.last(alternativesTokens)); + if (hasLastAnEmptyAlt) { + var lastIdx = alternativesTokens.length - 1; + /** + * This will return the Index of the alternative to take or the if only the empty alternative matched + */ + return function chooseAlternativeWithEmptyAlt() { + var nextToken = this.NEXT_TOKEN(); + // checking only until length - 1 because there is nothing to check in an empty alternative, it is always valid + for (var i = 0; i < lastIdx; i++) { + var currAltTokens = alternativesTokens[i]; + // 'for' loop for performance reasons. + for (var j = 0; j < currAltTokens.length; j++) { + if (nextToken instanceof currAltTokens[j]) { + return i; + } } } - } - return -1; - }; + // an OR(alternation) with an empty alternative will always match + return lastIdx; + }; + } + else { + /** + * This will return the Index of the alternative to take or -1 if none of the alternatives match + */ + return function chooseAlternative() { + var nextToken = this.NEXT_TOKEN(); + for (var i = 0; i < alternativesTokens.length; i++) { + var currAltTokens = alternativesTokens[i]; + // 'for' loop for performance reasons. + for (var j = 0; j < currAltTokens.length; j++) { + if (nextToken instanceof currAltTokens[j]) { + return i; + } + } + } + return -1; + }; + } } lookahead.buildLookaheadForOr = buildLookaheadForOr; function checkAlternativesAmbiguities(alternativesTokens) { @@ -1768,9 +1793,10 @@ var chevrotain; // the top most range must strictly contain all the other ranges // which is why we prefix the text with " " (curr Range impel is only for positive ranges) var spacedImpelText = " " + impelText; + // TODO: why do we add whitespace twice? var txtWithoutComments = removeComments(" " + spacedImpelText); - // TODO: consider removing literal strings too to avoid future errors (literal string with ')' for example) - var prodRanges = createRanges(txtWithoutComments); + var textWithoutCommentsAndStrings = removeStringLiterals(txtWithoutComments); + var prodRanges = createRanges(textWithoutCommentsAndStrings); var topRange = new r.Range(0, impelText.length + 2); return buildTopLevel(name, topRange, prodRanges, impelText); } @@ -1891,12 +1917,20 @@ var chevrotain; gastBuilder.getDirectlyContainedRanges = getDirectlyContainedRanges; var singleLineCommentRegEx = /\/\/.*/g; var multiLineCommentRegEx = /\/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\//g; + var doubleQuoteStringLiteralRegEx = /"([^\\"]+|\\([bfnrtv"\\/]|u[0-9a-fA-F]{4}))*"/g; + var singleQuoteStringLiteralRegEx = /'([^\\']+|\\([bfnrtv'\\/]|u[0-9a-fA-F]{4}))*'/g; function removeComments(text) { var noSingleLine = text.replace(singleLineCommentRegEx, ""); var noComments = noSingleLine.replace(multiLineCommentRegEx, ""); return noComments; } gastBuilder.removeComments = removeComments; + function removeStringLiterals(text) { + var noDoubleQuotes = text.replace(doubleQuoteStringLiteralRegEx, ""); + var noSingleQuotes = noDoubleQuotes.replace(singleQuoteStringLiteralRegEx, ""); + return noSingleQuotes; + } + gastBuilder.removeStringLiterals = removeStringLiterals; function createRanges(text) { var terminalRanges = createTerminalRanges(text); var refsRanges = createRefsRanges(text); @@ -2331,6 +2365,49 @@ var chevrotain; ParserDefinitionErrorType[ParserDefinitionErrorType["LEFT_RECURSION"] = 4] = "LEFT_RECURSION"; })(chevrotain.ParserDefinitionErrorType || (chevrotain.ParserDefinitionErrorType = {})); var ParserDefinitionErrorType = chevrotain.ParserDefinitionErrorType; + /** + * convenience used to express an empty alternative in an OR (alternation). + * can be used to more clearly describe the intent in a case of empty alternation. + * + * for example: + * + * 1. without using EMPTY_ALT: + * + * this.OR([ + * {ALT: () => { + * this.CONSUME1(OneTok) + * return "1" + * }}, + * {ALT: () => { + * this.CONSUME1(TwoTok) + * return "2" + * }}, + * {ALT: () => { // implicitly empty because there are no invoked grammar rules (OR/MANY/CONSUME...) inside this alternative. + * return "666" + * }}, + * ]) + * + * + * * 2. using EMPTY_ALT: + * + * this.OR([ + * {ALT: () => { + * this.CONSUME1(OneTok) + * return "1" + * }}, + * {ALT: () => { + * this.CONSUME1(TwoTok) + * return "2" + * }}, + * {ALT: EMPTY_ALT("666")}, // explicitly empty, clearer intent + * ]) + * + */ + chevrotain.EMPTY_ALT = function emptyAlt(value) { + return function () { + return value; + }; + }; var EOF_FOLLOW_KEY = {}; /** * A Recognizer capable of self analysis to determine it's grammar structure @@ -2741,7 +2818,7 @@ var chevrotain; * * using the short form is recommended as it will compute the lookahead function * automatically. however this currently has one limitation: - * It only works if the lookahead for the grammar is one. + * It only works if the lookahead for the grammar is one LL(1). * * As in CONSUME the index in the method name indicates the occurrence * of the alternation production in it's top rule. @@ -3599,7 +3676,7 @@ var API = {}; /* istanbul ignore next */ if (!testMode) { // semantic version - API.VERSION = "0.5.7"; + API.VERSION = "0.5.8"; // runtime API API.Parser = chevrotain.Parser; API.Lexer = chevrotain.Lexer; @@ -3609,6 +3686,8 @@ if (!testMode) { // Tokens utilities API.extendToken = chevrotain.extendToken; API.tokenName = chevrotain.tokenName; + // Other Utilities + API.EMPTY_ALT = chevrotain.EMPTY_ALT; API.exceptions = {}; API.exceptions.isRecognitionException = chevrotain.exceptions.isRecognitionException; API.exceptions.EarlyExitException = chevrotain.exceptions.EarlyExitException; diff --git a/src/api.ts b/src/api.ts index dec2c78a1..958cf4375 100644 --- a/src/api.ts +++ b/src/api.ts @@ -12,7 +12,7 @@ let API:any = {} /* istanbul ignore next */ if (!testMode) { // semantic version - API.VERSION = "0.5.7"; + API.VERSION = "0.5.8"; // runtime API API.Parser = chevrotain.Parser