-
Notifications
You must be signed in to change notification settings - Fork 188
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
324 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,320 @@ | ||
exception Syntax_error of (Lexing.position * Lexing.position) * string | ||
|
||
let sign = [%sedlex.regexp? Opt ('+' | '-')] | ||
|
||
let digit = [%sedlex.regexp? '0' .. '9'] | ||
|
||
let hexdigit = [%sedlex.regexp? '0' .. '9' | 'a' .. 'f' | 'A' .. 'F'] | ||
|
||
let num = [%sedlex.regexp? digit, Star (Opt '_', digit)] | ||
|
||
let hexnum = [%sedlex.regexp? hexdigit, Star (Opt '_', hexdigit)] | ||
|
||
let uN = [%sedlex.regexp? num | "0x", hexnum] | ||
|
||
let sN = [%sedlex.regexp? sign, uN] | ||
|
||
let iN = [%sedlex.regexp? uN | sN] | ||
|
||
let float = [%sedlex.regexp? num, Opt ('.', Opt num), Opt (('e' | 'E'), sign, num)] | ||
|
||
let hexfloat = | ||
[%sedlex.regexp? "0x", hexnum, Opt ('.', Opt hexnum), Opt (('p' | 'P'), sign, num)] | ||
|
||
let fN = [%sedlex.regexp? sign, (float | hexfloat | "inf" | "nan" | "nan:", hexnum)] | ||
|
||
let idchar = | ||
[%sedlex.regexp? | ||
( '0' .. '9' | ||
| 'A' .. 'Z' | ||
| 'a' .. 'z' | ||
| '!' | ||
| '#' | ||
| '$' | ||
| '%' | ||
| '&' | ||
| '\'' | ||
| '*' | ||
| '+' | ||
| '-' | ||
| '.' | ||
| '/' | ||
| ':' | ||
| '<' | ||
| '=' | ||
| '>' | ||
| '?' | ||
| '@' | ||
| '\\' | ||
| '^' | ||
| '_' | ||
| '`' | ||
| '|' | ||
| '~' )] | ||
|
||
let id = [%sedlex.regexp? '$', Plus idchar] | ||
|
||
let linechar = [%sedlex.regexp? Sub (any, (10 | 13))] | ||
|
||
let newline = [%sedlex.regexp? 10 | 13 | 13, 10] | ||
|
||
let linecomment = [%sedlex.regexp? ";;", Star linechar, (newline | eof)] | ||
|
||
let format = [%sedlex.regexp? '\n' | 9] | ||
|
||
(* | ||
let space = [%sedlex.regexp? ' ' | format | comment] | ||
*) | ||
let keyword = [%sedlex.regexp? Plus idchar] | ||
|
||
let rec comment lexbuf = | ||
match%sedlex lexbuf with | ||
| ";)" -> () | ||
| "(;" -> | ||
comment lexbuf; | ||
comment lexbuf | ||
| ';' | '(' | Plus (Sub (any, (';' | '('))) -> comment lexbuf | ||
| _ -> | ||
raise | ||
(Syntax_error | ||
(Sedlexing.lexing_positions lexbuf, Printf.sprintf "Malformed comment.\n")) | ||
|
||
let string_buffer = Buffer.create 256 | ||
|
||
let rec string lexbuf = | ||
match%sedlex lexbuf with | ||
| '"' -> | ||
Buffer.add_string string_buffer (Sedlexing.Utf8.lexeme lexbuf); | ||
let s = Buffer.contents string_buffer in | ||
Buffer.clear string_buffer; | ||
s | ||
| Plus (Sub (any, (0 .. 31 | 0x7f | '"' | '\\'))) | ||
| "\\t" | "\\n" | "\\r" | "\\'" | "\\\"" | "\\\\" | ||
| "\\u{", hexnum, "}" -> | ||
Buffer.add_string string_buffer (Sedlexing.Utf8.lexeme lexbuf); | ||
string lexbuf | ||
| _ -> | ||
raise | ||
(Syntax_error | ||
(Sedlexing.lexing_positions lexbuf, Printf.sprintf "Malformed string.\n")) | ||
|
||
type pos = | ||
{ loc : Lexing.position | ||
; byte_loc : int | ||
} | ||
|
||
type token = | ||
| LPAREN | ||
| RPAREN | ||
| ATOM of string | ||
| EOF | ||
|
||
let locs lexbuf = | ||
let loc, loc' = Sedlexing.lexing_positions lexbuf in | ||
let byte_loc, byte_loc' = Sedlexing.bytes_loc lexbuf in | ||
{ loc; byte_loc }, { loc = loc'; byte_loc = byte_loc' } | ||
|
||
let rec token lexbuf = | ||
match%sedlex lexbuf with | ||
| '(' -> LPAREN, locs lexbuf | ||
| ')' -> RPAREN, locs lexbuf | ||
| uN | sN | fN | keyword -> ATOM (Sedlexing.Utf8.lexeme lexbuf), locs lexbuf | ||
| '"' -> | ||
let string_start = | ||
{ loc = Sedlexing.lexing_position_start lexbuf | ||
; byte_loc = Sedlexing.lexeme_bytes_start lexbuf | ||
} | ||
in | ||
Buffer.add_char string_buffer '"'; | ||
let str = string lexbuf in | ||
( ATOM str | ||
, ( string_start | ||
, { loc = Sedlexing.lexing_position_curr lexbuf | ||
; byte_loc = Sedlexing.lexeme_bytes_end lexbuf | ||
} ) ) | ||
| newline | linecomment -> token lexbuf | ||
| Plus (' ' | '\t') -> token lexbuf | ||
| "(;" -> | ||
comment lexbuf; | ||
token lexbuf | ||
| eof -> EOF, locs lexbuf | ||
| _ -> | ||
raise | ||
(Syntax_error (Sedlexing.lexing_positions lexbuf, Printf.sprintf "Syntax error.\n")) | ||
|
||
type t = | ||
{ loc : pos * pos | ||
; desc : desc | ||
} | ||
|
||
and desc = | ||
| Atom of string | ||
| List of t list | ||
|
||
let rec parse_list lexbuf toplevel start_loc acc = | ||
let tok, (loc, loc') = token lexbuf in | ||
match tok with | ||
| LPAREN -> | ||
let lst, loc = parse_list lexbuf false loc [] in | ||
parse_list lexbuf toplevel start_loc ({ desc = List lst; loc } :: acc) | ||
| RPAREN -> | ||
if toplevel | ||
then | ||
raise | ||
(Syntax_error | ||
( Sedlexing.lexing_positions lexbuf | ||
, Printf.sprintf "Missing closing parenthesis.\n" )); | ||
List.rev acc, (start_loc, loc') | ||
| EOF -> | ||
if not toplevel | ||
then | ||
raise | ||
(Syntax_error | ||
( Sedlexing.lexing_positions lexbuf | ||
, Printf.sprintf "Unexpected end of file.\n" )); | ||
List.rev acc, (start_loc, loc') | ||
| ATOM s -> | ||
parse_list lexbuf toplevel start_loc ({ loc = loc, loc'; desc = Atom s } :: acc) | ||
|
||
let parse lexbuf = | ||
parse_list | ||
lexbuf | ||
true | ||
{ loc = Sedlexing.lexing_position_start lexbuf | ||
; byte_loc = Sedlexing.lexeme_bytes_end lexbuf | ||
} | ||
[] | ||
|
||
let report_syntax_error loc msg = | ||
let location = MenhirLib.LexerUtil.range loc in | ||
Format.eprintf "%s%s%!" location msg; | ||
exit 1 | ||
|
||
module StringMap = Map.Make (String) | ||
|
||
type st = | ||
{ text : string | ||
; mutable pos : pos | ||
; variables : bool StringMap.t | ||
; buf : Buffer.t | ||
} | ||
|
||
let write st pos' = | ||
Buffer.add_substring st.buf st.text st.pos.byte_loc (pos'.byte_loc - st.pos.byte_loc); | ||
st.pos <- pos' | ||
|
||
let skip st (pos' : pos) = | ||
let lines = pos'.loc.pos_lnum - st.pos.loc.pos_lnum in | ||
let cols = | ||
pos'.loc.pos_cnum | ||
- pos'.loc.pos_bol | ||
- if lines > 0 then 0 else st.pos.loc.pos_cnum - st.pos.loc.pos_cnum | ||
in | ||
Buffer.add_string st.buf (String.make lines '\n'); | ||
Buffer.add_string st.buf (String.make cols ' '); | ||
st.pos <- pos' | ||
|
||
let pred_position { loc; byte_loc } = | ||
{ loc = { loc with pos_cnum = loc.pos_cnum - 1 }; byte_loc = byte_loc - 1 } | ||
|
||
let eval st cond = | ||
match cond with | ||
| { desc = Atom s; loc = pos, pos' } -> | ||
if not (StringMap.mem s st.variables) | ||
then | ||
raise | ||
(Syntax_error ((pos.loc, pos'.loc), Printf.sprintf "Unknown variable '%s'.\n" s)); | ||
StringMap.find s st.variables | ||
| { loc = pos, pos'; _ } -> | ||
raise (Syntax_error ((pos.loc, pos'.loc), Printf.sprintf "Syntax error.\n")) | ||
|
||
let rec rewrite_list st l = List.iter (rewrite st) l | ||
|
||
and rewrite st elt = | ||
match elt with | ||
| { desc = | ||
List | ||
({ desc = Atom "try"; _ } | ||
:: ( { desc = List ({ desc = Atom "result"; _ } :: _); _ } | ||
:: { desc = List ({ desc = Atom "do"; loc = _, pos_after_do } :: body) | ||
; loc = _, pos_after_body | ||
} | ||
:: _ | ||
| { desc = List ({ desc = Atom "do"; loc = _, pos_after_do } :: body) | ||
; loc = _, pos_after_body | ||
} | ||
:: _ )) | ||
; loc = pos, pos' | ||
} | ||
when StringMap.find "trap-on-exception" st.variables -> | ||
write st pos; | ||
Buffer.add_string st.buf "(block"; | ||
skip st pos_after_do; | ||
rewrite_list st body; | ||
write st pos_after_body; | ||
skip st pos' | ||
| { desc = List ({ desc = Atom "throw"; _ } :: _); loc = pos, pos' } | ||
when StringMap.find "trap-on-exception" st.variables -> | ||
write st pos; | ||
Buffer.add_string st.buf "(unreachable)"; | ||
skip st pos' | ||
| { desc = | ||
List | ||
[ { desc = Atom "#if"; _ } | ||
; cond | ||
; { desc = List ({ desc = Atom "#then"; loc = _, pos_after_then } :: then_body) | ||
; loc = _, pos_after_then_body | ||
} | ||
] | ||
; loc = pos, pos' | ||
} -> | ||
write st pos; | ||
if eval st cond | ||
then ( | ||
skip st pos_after_then; | ||
rewrite_list st then_body; | ||
write st (pred_position pos_after_then_body); | ||
skip st pos') | ||
else skip st pos' | ||
| { desc = | ||
List | ||
[ { desc = Atom "#if"; _ } | ||
; cond | ||
; { desc = List ({ desc = Atom "#then"; loc = _, pos_after_then } :: then_body) | ||
; loc = _, pos_after_then_body | ||
} | ||
; { desc = List ({ desc = Atom "#else"; loc = _, pos_after_else } :: else_body) | ||
; loc = _, pos_after_else_body | ||
} | ||
] | ||
; loc = pos, pos' | ||
} -> | ||
write st pos; | ||
if eval st cond | ||
then ( | ||
skip st pos_after_then; | ||
rewrite_list st then_body; | ||
write st (pred_position pos_after_then_body); | ||
skip st pos') | ||
else ( | ||
skip st pos_after_else; | ||
rewrite_list st else_body; | ||
write st (pred_position pos_after_else_body); | ||
skip st pos') | ||
| { desc = List l; _ } -> rewrite_list st l | ||
| _ -> () | ||
|
||
let f ~variables ~filename ~contents:text = | ||
let variables = ("trap-on-exception", false) :: variables in | ||
let variables = | ||
List.fold_left (fun m (k, v) -> StringMap.add k v m) StringMap.empty variables | ||
in | ||
let lexbuf = Sedlexing.Utf8.from_string text in | ||
Sedlexing.set_filename lexbuf filename; | ||
try | ||
let t, (pos, end_pos) = parse lexbuf in | ||
let st = { text; pos; variables; buf = Buffer.create (String.length text) } in | ||
rewrite_list st t; | ||
write st end_pos; | ||
Buffer.contents st.buf | ||
with Syntax_error (loc, msg) -> report_syntax_error loc msg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
val f : variables:(string * bool) list -> filename:string -> contents:string -> string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters