-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
287 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ depends: [ | |
"ocaml" | ||
"dune" {>= "3.15"} | ||
"base" | ||
"angstrom" | ||
"ppx_deriving" | ||
"odoc" {with-doc} | ||
] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
[@@@ocaml.text "/*"] | ||
|
||
(** Copyright 2024, Andrei, PavlushaSource *) | ||
|
||
(** SPDX-License-Identifier: MIT *) | ||
|
||
[@@@ocaml.text "/*"] | ||
|
||
open! Base | ||
open Angstrom | ||
open LAst | ||
|
||
(* ======= Utils ======= *) | ||
|
||
let unit = return () | ||
|
||
let skip_ws = skip_while Char.is_whitespace | ||
let skip_comments = string "(*" *> many_till any_char (string "*)") *> unit | ||
|
||
let ws = skip_ws *> sep_by skip_ws skip_comments *> unit | ||
let ws1 = skip Char.is_whitespace *> ws | ||
|
||
let parens p = char '(' *> p <* ws <* char ')' | ||
|
||
(* ======= Identifiers ======= *) | ||
(* https://ocaml.org/manual/5.2/lex.html#sss:lex:identifiers *) | ||
|
||
let is_keyword = function | ||
(* https://ocaml.org/manual/5.2/lex.html#sss:keywords *) | ||
| "true" | ||
| "false" | ||
| "match" | ||
| "with" | ||
| "let" | ||
| "rec" | ||
| "and" | ||
| "in" | ||
| "type" | ||
| "function" | ||
| "fun" | ||
| "if" | ||
| "then" | ||
| "else" -> | ||
true | ||
| _ -> | ||
false | ||
|
||
(* XXX: operator keywords? *) | ||
(* let is_keyword_op _ = false *) | ||
|
||
let pident flag = | ||
let is_first = | ||
match flag with | ||
| `LowerCase -> ( | ||
function 'a' .. 'z' | '_' -> true | _ -> false ) | ||
| `Capitalized -> ( | ||
function 'A' .. 'Z' -> true | _ -> false ) | ||
in | ||
|
||
let* first = satisfy is_first >>| String.of_char in | ||
let* rest = | ||
take_while (function | ||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' -> | ||
true | ||
| _ -> | ||
false ) | ||
in | ||
let id = first ^ rest in | ||
if is_keyword id then fail "keyword" else return (Id id) | ||
|
||
let pvalue_id = pident `LowerCase | ||
|
||
let pconstr_id = | ||
let keywords = | ||
choice [string "true"; string "false"; string "()"] >>| fun x -> Id x | ||
in | ||
pident `Capitalized <|> keywords | ||
|
||
(* ======= Constants ======= *) | ||
|
||
let pconst = | ||
let pint = | ||
let* num = take_while Char.is_digit in | ||
match Int.of_string_opt num with | ||
| None -> | ||
fail "not an integer" | ||
| Some x -> | ||
return (ConstInt x) | ||
in | ||
|
||
let pchar = char '\'' *> any_char <* char '\'' >>| fun x -> ConstChar x in | ||
|
||
let pstring = | ||
char '"' *> take_till (Char.( = ) '"') | ||
<* advance 1 | ||
>>| fun x -> ConstString x | ||
in | ||
|
||
choice [pint; pchar; pstring] | ||
|
||
(* ======= Operators ======= *) | ||
|
||
type ('op, 'oprnd) op_kind = | ||
| Prefix of {apply: 'op -> 'oprnd -> 'oprnd} | ||
| Infix of {assoc: [`Left | `Right]; apply: 'op -> 'oprnd -> 'oprnd -> 'oprnd} | ||
|
||
type 'oprnd op_parse = | ||
| Op : | ||
{pop: 'op t; kind: ('op, 'oprnd) op_kind; prhs: ('op -> 'oprnd t) option} | ||
-> 'oprnd op_parse | ||
|
||
type 'oprnd op_parse_table = 'oprnd op_parse list | ||
|
||
let poperators ~(table : 'oprnd op_parse_table) ~(poprnd : 'oprnd t) = | ||
(* Convert the table to lists of infix/prefix parsers | ||
with explicit priorities assigned *) | ||
let _, prefixs, infixs = | ||
List.fold table ~init:(0, [], []) ~f:(fun (prio, prefixs, infixs) (Op op) -> | ||
let prhs = Option.value op.prhs ~default:(fun _ -> poprnd) in | ||
|
||
match op.kind with | ||
| Prefix {apply} -> | ||
let pop = op.pop >>| fun x -> (prio, prhs x, apply x) in | ||
(prio + 1, pop :: prefixs, infixs) | ||
| Infix {assoc; apply} -> | ||
let pop = op.pop >>| fun x -> (assoc, prio, prhs x, apply x) in | ||
(prio + 1, prefixs, pop :: infixs) ) | ||
in | ||
|
||
(* Pratt parser | ||
https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html *) | ||
let rec helper min_prio plhs = | ||
let pprefix = | ||
let* prio, prhs, apply = ws *> choice prefixs in | ||
let* rhs = helper prio prhs in | ||
return (apply rhs) | ||
in | ||
let* lhs = pprefix <|> plhs in | ||
|
||
let pinfix = | ||
let* assoc, prio, prhs, apply = ws *> choice infixs in | ||
let* () = if prio < min_prio then fail "" else unit in | ||
|
||
(* if left assoc then break if next operator has the same priority *) | ||
let prio = match assoc with `Left -> prio + 1 | `Right -> prio in | ||
let* rhs = helper prio prhs in | ||
|
||
return (apply, rhs) | ||
in | ||
(* XXX: recursive parser with acc would be better here | ||
than many + fold *) | ||
many pinfix | ||
>>| List.fold ~init:lhs ~f:(fun acc (apply, oprnd) -> apply acc oprnd) | ||
in | ||
|
||
helper 0 poprnd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
[@@@ocaml.text "/*"] | ||
|
||
(** Copyright 2024, Andrei, PavlushaSource *) | ||
|
||
(** SPDX-License-Identifier: MIT *) | ||
|
||
[@@@ocaml.text "/*"] | ||
|
||
open! Base | ||
open Angstrom | ||
open LAst | ||
|
||
(* ======= Utils ======= *) | ||
val unit : unit t | ||
val ws : unit t | ||
val ws1 : unit t | ||
val parens : 'a t -> 'a t | ||
|
||
(* ======= Identifiers & Constants ======= *) | ||
val pvalue_id : ident t | ||
val pconstr_id : ident t | ||
val pconst : constant t | ||
|
||
(* ======= Operators ======= *) | ||
type ('op, 'oprnd) op_kind = | ||
| Prefix of {apply: 'op -> 'oprnd -> 'oprnd} | ||
| Infix of {assoc: [`Left | `Right]; apply: 'op -> 'oprnd -> 'oprnd -> 'oprnd} | ||
|
||
type 'oprnd op_parse = | ||
| Op : | ||
{ pop: 'op t (** Operator symbol parser *) | ||
; kind: ('op, 'oprnd) op_kind (** Kind of an operator *) | ||
; prhs: ('op -> 'oprnd t) option | ||
(** Optional custom parser for a rhs of an operator *) } | ||
-> 'oprnd op_parse | ||
|
||
(** | ||
Order in a list sets operators' priority. | ||
Last operator in a table has the highest priority | ||
*) | ||
type 'oprnd op_parse_table = 'oprnd op_parse list | ||
|
||
val poperators : table:'oprnd op_parse_table -> poprnd:'oprnd t -> 'oprnd t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(library | ||
(name LParse) | ||
(public_name NeML.Parse) | ||
(libraries base angstrom LAst)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
[@@@ocaml.text "/*"] | ||
|
||
(** Copyright 2024, Andrei, PavlushaSource *) | ||
|
||
(** SPDX-License-Identifier: MIT *) | ||
|
||
[@@@ocaml.text "/*"] | ||
|
||
(* https://ocaml.org/manual/5.2/patterns.html *) | ||
|
||
open! Base | ||
open Angstrom | ||
open LAst | ||
|
||
open Common | ||
|
||
let pany = char '_' *> return PatAny | ||
let pvar = pvalue_id >>| fun id -> PatVar id | ||
let pconst = pconst >>| fun const -> PatConst const | ||
|
||
(* [Cons (hd, tl)] *) | ||
let pconstruct ppat_single = | ||
let* id = pconstr_id in | ||
let* arg = option None (ppat_single >>| Option.some) in | ||
return (PatConstruct (id, arg)) | ||
|
||
(** [a; b; c] *) | ||
let plist ppat = | ||
let pelements = | ||
sep_by (ws *> char ';') ppat | ||
>>| List.fold_right | ||
~init:(PatConstruct (Id "[]", None)) | ||
~f:(fun pat acc -> PatConstruct (Id "::", Some (PatTuple [pat; acc]))) | ||
in | ||
char '[' *> pelements <* ws <* char ']' | ||
|
||
let ppat_single ppat = | ||
fix (fun ppat_single -> | ||
ws | ||
*> choice | ||
[pconstruct ppat_single; pany; pvar; pconst; plist ppat; parens ppat] ) | ||
|
||
(* ======= Operators ======= *) | ||
|
||
let table = | ||
let apply_or _ lhs rhs = PatOr (lhs, rhs) in | ||
|
||
let apply_tuple _ lhs = function | ||
| PatTuple tl -> | ||
PatTuple (lhs :: tl) | ||
| pat -> | ||
PatTuple [lhs; pat] | ||
in | ||
|
||
let apply_list _ lhs rhs = | ||
PatConstruct (Id "::", Some (PatTuple [lhs; rhs])) | ||
in | ||
|
||
[ Op {pop= string "|"; kind= Infix {assoc= `Left; apply= apply_or}; prhs= None} | ||
; Op | ||
{ pop= string "," | ||
; kind= Infix {assoc= `Right; apply= apply_tuple} | ||
; prhs= None } | ||
; Op | ||
{ pop= string "::" | ||
; kind= Infix {assoc= `Right; apply= apply_list} | ||
; prhs= None } ] | ||
|
||
let ppat = fix (fun ppat -> poperators ~table ~poprnd:(ppat_single ppat)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[@@@ocaml.text "/*"] | ||
|
||
(** Copyright 2024, Andrei, PavlushaSource *) | ||
|
||
(** SPDX-License-Identifier: MIT *) | ||
|
||
[@@@ocaml.text "/*"] | ||
|
||
open! Base | ||
open Angstrom | ||
open LAst | ||
|
||
val ppat : pattern t |