Skip to content

Commit

Permalink
feat: patterns parser
Browse files Browse the repository at this point in the history
  • Loading branch information
therain7 committed Oct 19, 2024
1 parent c9f132b commit 81e5c35
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 1 deletion.
1 change: 1 addition & 0 deletions NeML.opam
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ depends: [
"ocaml"
"dune" {>= "3.15"}
"base"
"angstrom"
"ppx_deriving"
"odoc" {with-doc}
]
Expand Down
2 changes: 1 addition & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
(name NeML)
(synopsis "NeML compiler")
(description "A compiler for some non-existent programming language")
(depends ocaml dune base ppx_deriving))
(depends ocaml dune base angstrom ppx_deriving))
156 changes: 156 additions & 0 deletions lib/parse/common.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
[@@@ocaml.text "/*"]

(** Copyright 2024, Andrei, PavlushaSource *)

(** SPDX-License-Identifier: MIT *)

[@@@ocaml.text "/*"]

open! Base
open Angstrom
open LAst

(* ======= Utils ======= *)

let unit = return ()

let skip_ws = skip_while Char.is_whitespace
let skip_comments = string "(*" *> many_till any_char (string "*)") *> unit

let ws = skip_ws *> sep_by skip_ws skip_comments *> unit
let ws1 = skip Char.is_whitespace *> ws

let parens p = char '(' *> p <* ws <* char ')'

(* ======= Identifiers ======= *)
(* https://ocaml.org/manual/5.2/lex.html#sss:lex:identifiers *)

let is_keyword = function
(* https://ocaml.org/manual/5.2/lex.html#sss:keywords *)
| "true"
| "false"
| "match"
| "with"
| "let"
| "rec"
| "and"
| "in"
| "type"
| "function"
| "fun"
| "if"
| "then"
| "else" ->
true
| _ ->
false

(* XXX: operator keywords? *)
(* let is_keyword_op _ = false *)

let pident flag =
let is_first =
match flag with
| `LowerCase -> (
function 'a' .. 'z' | '_' -> true | _ -> false )
| `Capitalized -> (
function 'A' .. 'Z' -> true | _ -> false )
in

let* first = satisfy is_first >>| String.of_char in
let* rest =
take_while (function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' ->
true
| _ ->
false )
in
let id = first ^ rest in
if is_keyword id then fail "keyword" else return (Id id)

let pvalue_id = pident `LowerCase

let pconstr_id =
let keywords =
choice [string "true"; string "false"; string "()"] >>| fun x -> Id x
in
pident `Capitalized <|> keywords

(* ======= Constants ======= *)

let pconst =
let pint =
let* num = take_while Char.is_digit in
match Int.of_string_opt num with
| None ->
fail "not an integer"
| Some x ->
return (ConstInt x)
in

let pchar = char '\'' *> any_char <* char '\'' >>| fun x -> ConstChar x in

let pstring =
char '"' *> take_till (Char.( = ) '"')
<* advance 1
>>| fun x -> ConstString x
in

choice [pint; pchar; pstring]

(* ======= Operators ======= *)

type ('op, 'oprnd) op_kind =
| Prefix of {apply: 'op -> 'oprnd -> 'oprnd}
| Infix of {assoc: [`Left | `Right]; apply: 'op -> 'oprnd -> 'oprnd -> 'oprnd}

type 'oprnd op_parse =
| Op :
{pop: 'op t; kind: ('op, 'oprnd) op_kind; prhs: ('op -> 'oprnd t) option}
-> 'oprnd op_parse

type 'oprnd op_parse_table = 'oprnd op_parse list

let poperators ~(table : 'oprnd op_parse_table) ~(poprnd : 'oprnd t) =
(* Convert the table to lists of infix/prefix parsers
with explicit priorities assigned *)
let _, prefixs, infixs =
List.fold table ~init:(0, [], []) ~f:(fun (prio, prefixs, infixs) (Op op) ->
let prhs = Option.value op.prhs ~default:(fun _ -> poprnd) in

match op.kind with
| Prefix {apply} ->
let pop = op.pop >>| fun x -> (prio, prhs x, apply x) in
(prio + 1, pop :: prefixs, infixs)
| Infix {assoc; apply} ->
let pop = op.pop >>| fun x -> (assoc, prio, prhs x, apply x) in
(prio + 1, prefixs, pop :: infixs) )
in

(* Pratt parser
https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html *)
let rec helper min_prio plhs =
let pprefix =
let* prio, prhs, apply = ws *> choice prefixs in
let* rhs = helper prio prhs in
return (apply rhs)
in
let* lhs = pprefix <|> plhs in

let pinfix =
let* assoc, prio, prhs, apply = ws *> choice infixs in
let* () = if prio < min_prio then fail "" else unit in

(* if left assoc then break if next operator has the same priority *)
let prio = match assoc with `Left -> prio + 1 | `Right -> prio in
let* rhs = helper prio prhs in

return (apply, rhs)
in
(* XXX: recursive parser with acc would be better here
than many + fold *)
many pinfix
>>| List.fold ~init:lhs ~f:(fun acc (apply, oprnd) -> apply acc oprnd)
in

helper 0 poprnd
43 changes: 43 additions & 0 deletions lib/parse/common.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[@@@ocaml.text "/*"]

(** Copyright 2024, Andrei, PavlushaSource *)

(** SPDX-License-Identifier: MIT *)

[@@@ocaml.text "/*"]

open! Base
open Angstrom
open LAst

(* ======= Utils ======= *)
val unit : unit t
val ws : unit t
val ws1 : unit t
val parens : 'a t -> 'a t

(* ======= Identifiers & Constants ======= *)
val pvalue_id : ident t
val pconstr_id : ident t
val pconst : constant t

(* ======= Operators ======= *)
type ('op, 'oprnd) op_kind =
| Prefix of {apply: 'op -> 'oprnd -> 'oprnd}
| Infix of {assoc: [`Left | `Right]; apply: 'op -> 'oprnd -> 'oprnd -> 'oprnd}

type 'oprnd op_parse =
| Op :
{ pop: 'op t (** Operator symbol parser *)
; kind: ('op, 'oprnd) op_kind (** Kind of an operator *)
; prhs: ('op -> 'oprnd t) option
(** Optional custom parser for a rhs of an operator *) }
-> 'oprnd op_parse

(**
Order in a list sets operators' priority.
Last operator in a table has the highest priority
*)
type 'oprnd op_parse_table = 'oprnd op_parse list

val poperators : table:'oprnd op_parse_table -> poprnd:'oprnd t -> 'oprnd t
4 changes: 4 additions & 0 deletions lib/parse/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(library
(name LParse)
(public_name NeML.Parse)
(libraries base angstrom LAst))
69 changes: 69 additions & 0 deletions lib/parse/pat.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
[@@@ocaml.text "/*"]

(** Copyright 2024, Andrei, PavlushaSource *)

(** SPDX-License-Identifier: MIT *)

[@@@ocaml.text "/*"]

(* https://ocaml.org/manual/5.2/patterns.html *)

open! Base
open Angstrom
open LAst

open Common

let pany = char '_' *> return PatAny
let pvar = pvalue_id >>| fun id -> PatVar id
let pconst = pconst >>| fun const -> PatConst const

(* [Cons (hd, tl)] *)
let pconstruct ppat_single =
let* id = pconstr_id in
let* arg = option None (ppat_single >>| Option.some) in
return (PatConstruct (id, arg))

(** [a; b; c] *)
let plist ppat =
let pelements =
sep_by (ws *> char ';') ppat
>>| List.fold_right
~init:(PatConstruct (Id "[]", None))
~f:(fun pat acc -> PatConstruct (Id "::", Some (PatTuple [pat; acc])))
in
char '[' *> pelements <* ws <* char ']'

let ppat_single ppat =
fix (fun ppat_single ->
ws
*> choice
[pconstruct ppat_single; pany; pvar; pconst; plist ppat; parens ppat] )

(* ======= Operators ======= *)

let table =
let apply_or _ lhs rhs = PatOr (lhs, rhs) in

let apply_tuple _ lhs = function
| PatTuple tl ->
PatTuple (lhs :: tl)
| pat ->
PatTuple [lhs; pat]
in

let apply_list _ lhs rhs =
PatConstruct (Id "::", Some (PatTuple [lhs; rhs]))
in

[ Op {pop= string "|"; kind= Infix {assoc= `Left; apply= apply_or}; prhs= None}
; Op
{ pop= string ","
; kind= Infix {assoc= `Right; apply= apply_tuple}
; prhs= None }
; Op
{ pop= string "::"
; kind= Infix {assoc= `Right; apply= apply_list}
; prhs= None } ]

let ppat = fix (fun ppat -> poperators ~table ~poprnd:(ppat_single ppat))
13 changes: 13 additions & 0 deletions lib/parse/pat.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[@@@ocaml.text "/*"]

(** Copyright 2024, Andrei, PavlushaSource *)

(** SPDX-License-Identifier: MIT *)

[@@@ocaml.text "/*"]

open! Base
open Angstrom
open LAst

val ppat : pattern t

0 comments on commit 81e5c35

Please sign in to comment.