diff --git a/inputters/sil-epnf.lua b/inputters/sil-epnf.lua index 8be206a44..4cc90196e 100644 --- a/inputters/sil-epnf.lua +++ b/inputters/sil-epnf.lua @@ -1,4 +1,5 @@ -local bits = SILE.parserBits +local epnf = require("epnf") +local bits = require("core.parserbits") local passthroughCommands = { ftl = true, @@ -25,7 +26,7 @@ end -- luacheck: push ignore ---@diagnostic disable: undefined-global, unused-local, lowercase-global -local function grammar (_ENV) +local function builder (_ENV) local _ = WS^0 local eol = S"\r\n" local specials = S"\\{}%" @@ -92,4 +93,10 @@ local function grammar (_ENV) ) end -return grammar +local grammar = epnf.define(builder) + +local function parser (string) + return epnf.parsestring(grammar, string) +end + +return parser diff --git a/inputters/sil-lpeg-re.lua b/inputters/sil-lpeg-re.lua new file mode 100644 index 000000000..6a3176024 --- /dev/null +++ b/inputters/sil-lpeg-re.lua @@ -0,0 +1,68 @@ +local lpeg = require("lpeg") +local re = require("re") +local bits = require("core.parserbits") + +local P, C, S = lpeg.P, lpeg.C, lpeg.S +local myID = C(bits.silidentifier) / 1 + +local wrapper = function (a) return type(a)=="table" and a or {} end +local specials = S"{}%\\" + +local expression = [=[ + +document <- texlike_stuff !. + +texlike_stuff <- {: environment / comment / texlike_text / texlike_braced_stuff / texlike_command :}* + +environment <- '\begin' {:options: %parameters :} + ('{' {:command: passthrough_cmd :} '}' passthrough_env_stuff pass_end / + '{' {:command: %cmdID :} '}' texlike_stuff notpass_end) + +comment <- ('%' (!%eol .)* %eol ) -> '' + +texlike_text <- { (!%specials . / %escaped_specials)+ } -> unescapeSpecials + +texlike_braced_stuff <- '{' texlike_stuff '}' + +texlike_command <- '\' ({:command: passthrough_cmd :} {:options: %parameters :} + passthrough_braced_stuff / {:command: %cmdID :} {:options: %parameters :} + texlike_braced_stuff) + +passthrough_cmd <- 'ftl' / 'lua' / 'math' / 'raw' / 'script' / 'sil' / 'use' / 'xml' + +passthrough_stuff <- { {: passthrough_text / passthrough_debraced_stuff :} } + +passthrough_env_stuff <- {: passthrough_env_text :}* + +passthrough_text <- { [^{}]+ } + +passthrough_env_text <- { (!('\end{' =command '}') .)+ } + +passthrough_braced_stuff <- '{' passthrough_stuff '}' + +passthrough_debraced_stuff <- { passthrough_braced_stuff } + +notpass_end <- '\end{' =command '}' _ + +pass_end <- '\end{' =command '}' _ + +_ <- %s* + +]=] + +local grammar = re.compile(expression, { + unescapeSpecials = function (str) + return str:gsub('\\([{}%%\\])', '%1') + end, + cmdID = myID - P"begin" - P"end", + parameters = (P"[" * bits.parameters * P"]")^-1 / wrapper, + eol = S"\r\n", + specials = specials, + escaped_specials = P"\\" * specials +}) + +local function parser (string) + return re.match(string, grammar) +end + +return parser diff --git a/inputters/sil.lua b/inputters/sil.lua index f37fa4e22..4cc8fe4d3 100644 --- a/inputters/sil.lua +++ b/inputters/sil.lua @@ -1,14 +1,13 @@ local base = require("inputters.base") -local epnf = require("epnf") +local usere = SU.boolean(_G["SIL_USE_RE"], false) +local parser = require("inputters.sil-" .. (usere and "lpeg-re" or "epnf")) local inputter = pl.class(base) inputter._name = "sil" inputter.order = 50 -inputter._grammar = require("inputters.sil-epnf") - inputter.appropriate = function (round, filename, doc) if round == 1 then return filename:match(".sil$") @@ -17,8 +16,7 @@ inputter.appropriate = function (round, filename, doc) local promising = sniff:match("\\begin") or sniff:match("\\document") or sniff:match("\\sile") return promising and inputter.appropriate(3, filename, doc) or false elseif round == 3 then - local _parser = epnf.define(inputter._grammar) - local status, _ = pcall(epnf.parsestring, _parser, doc) + local status, _ = pcall(parser, doc) return status end end @@ -26,7 +24,7 @@ end function inputter:_init () -- Save time when parsing strings by only setting up the grammar once per -- instantiation then re-using it on every use. - self._parser = self:rebuildParser() + self._parser = parser base._init(self) end @@ -113,12 +111,8 @@ local function massage_ast (tree, doc) end end -function inputter:rebuildParser () - return epnf.define(self._grammar) -end - function inputter:parse (doc) - local status, result = pcall(epnf.parsestring, self._parser, doc) + local status, result = pcall(self._parser, doc) if not status then return SU.error(([[Unable to parse input document to an AST tree. Parser error: