Reads and tokenizes an input string given a set of string or regex patterns.
With this module you are able to parse a string like "int a = 5" into this:
Token { name: 'int', lexeme: 'int', position: 0 }
Token { name: 'id', lexeme: 'a', position: 4 }
Token { name: '=', lexeme: '=', position: 6 }
Token { name: 'integer', lexeme: '5', position: 8 }
Install this module in your project by running:
$ npm install --save lexical-parser
'use strict';
const Lex = require('lexical-parser')
let input = 'block { int a = 0 int b = 3 if (a < b && b > a) { print("Hello World") } }'
// You can specify an exact string or a regex for the token
let tokenMatchers = [
'block',
'if',
'{',
'}',
'(',
')',
'<',
'>',
'=',
'&&',
'int',
['integer', /[0-9]+/],
['id', /[a-zA-Z][a-zA-Z0-9]*/],
['string', /\".*?\"/]
]
// The pattern to ignore in the input
let ignorePattern = '[\n\s \t]+'
let lex = new Lex(input, tokenMatchers, ignorePattern)
let token = undefined
try {
do {
token = lex.nextToken()
console.log(token)
} while (token)
} catch (err) {
// Error handling
if (err.code === "LEXICAL_ERROR") {
console.log(`\n${err.message}\n`)
console.log(`Position: ${err.position}`)
console.log(`Character: ${err.character}`)
console.log(`Nearby code: ${err.nearbyCode}`)
}
else
console.log(err)
}
Token { name: 'block', lexeme: 'block', position: 0 }
Token { name: '{', lexeme: '{', position: 6 }
Token { name: 'int', lexeme: 'int', position: 8 }
Token { name: 'id', lexeme: 'a', position: 12 }
Token { name: '=', lexeme: '=', position: 14 }
Token { name: 'integer', lexeme: '0', position: 16 }
Token { name: 'int', lexeme: 'int', position: 18 }
Token { name: 'id', lexeme: 'b', position: 22 }
Token { name: '=', lexeme: '=', position: 24 }
Token { name: 'integer', lexeme: '3', position: 26 }
Token { name: 'if', lexeme: 'if', position: 28 }
Token { name: '(', lexeme: '(', position: 31 }
Token { name: 'id', lexeme: 'a', position: 32 }
Token { name: '<', lexeme: '<', position: 34 }
Token { name: 'id', lexeme: 'b', position: 36 }
Token { name: '&&', lexeme: '&&', position: 38 }
Token { name: 'id', lexeme: 'b', position: 41 }
Token { name: '>', lexeme: '>', position: 43 }
Token { name: 'id', lexeme: 'a', position: 45 }
Token { name: ')', lexeme: ')', position: 46 }
Token { name: '{', lexeme: '{', position: 48 }
Token { name: 'id', lexeme: 'print', position: 50 }
Token { name: '(', lexeme: '(', position: 55 }
Token { name: 'string', lexeme: '"Hello World"', position: 56 }
Token { name: ')', lexeme: ')', position: 69 }
Token { name: '}', lexeme: '}', position: 71 }
Token { name: '}', lexeme: '}', position: 73 }
undefined