Skip to content

Commit

Permalink
Extend s-expr DSL for better pattern-matching and improve Parser perf…
Browse files Browse the repository at this point in the history
…ormance. (#139)
  • Loading branch information
abdoo8080 authored Nov 17, 2024
1 parent 4803bab commit a02090d
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 103 deletions.
190 changes: 110 additions & 80 deletions Smt/Data/Sexp.lean
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,30 @@
Copyright (c) 2021-2022 by the authors listed in the file AUTHORS and their
institutional affiliations. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Wojciech Nawrocki
Authors: Wojciech Nawrocki, Abdalrhman Mohamed, Henrik Böving
-/

import Std.Internal.Parsec.String

/-- The type of S-expressions. -/
inductive Sexp where
| atom : String → Sexp
| expr : List Sexp → Sexp
deriving Repr, BEq, Inhabited
deriving Repr, BEq, Inhabited

class ToSexp (α : Type u) where
toSexp : α → Sexp

namespace Sexp

def isAtom : Sexp → Bool
| atom _ => true
| _ => false

def isExpr : Sexp → Bool
| expr _ => true
| _ => false

partial def serialize : Sexp → String
| atom s => s
| expr ss => "(" ++ (" ".intercalate <| ss.map serialize) ++ ")"
Expand All @@ -26,87 +36,107 @@ def serializeMany (ss : List Sexp) : String :=
instance : ToString Sexp :=
⟨serialize⟩

inductive ParseError where
| /-- Incomplete input, for example missing a closing parenthesis. -/
incomplete (msg : String)
| /-- Malformed input, for example having too many closing parentheses. -/
malformed (msg : String)
namespace Parser

instance : ToString ParseError where
toString
| .incomplete msg => s!"incomplete input: {msg}"
| .malformed msg => s!"malformed input: {msg}"
open Std.Internal.Parsec String

/-- Tokenize `s` with the s-expression grammar. Supported token kinds are more or less as in
/-- Parse the s-expression grammar. Supported token kinds are more or less as in
https://smtlib.cs.uiowa.edu/papers/smt-lib-reference-v2.6-r2021-05-12.pdf:
- parentheses `(`/`)`
- symbols `abc`
- quoted symbols `|abc|`
- string literals `"abc"` -/
partial def tokenize (s : Substring) : Except ParseError (Array Substring) :=
go #[] s
where go (stk : Array Substring) (s : Substring) :=
-- Note: not written using `do` notation to ensure tail-call recursion
if s.isEmpty then .ok stk
else
let c := s.front
if c == '"' || c == '|' then
let s1 := s.drop 1 |>.takeWhile (· ≠ c)
if s1.stopPos = s.stopPos then
throw <| .incomplete s!"ending {c} missing after {s1}"
else
let s1 := ⟨s.str, s.startPos, s.next s1.stopPos⟩
let s2 := ⟨s.str, s1.stopPos, s.stopPos⟩
go (stk.push s1) s2
else if c == ')' || c == '(' then
go (stk.push <| s.take 1) (s.drop 1)
else if c.isWhitespace then
go stk (s.drop 1)
else
let tk := s.takeWhile fun c =>
!c.isWhitespace && c != '(' && c != ')' && c != '|' && c != '"'
-- assertion: tk.bsize > 0 as otherwise we would have gone into one of the branches above
go (stk.push tk) (s.extract ⟨tk.bsize⟩ ⟨s.bsize⟩)

mutual
partial def parseOneAux : List Substring → Except ParseError (Sexp × List Substring)
| tk :: tks => do
if tk.front == ')' then
throw <| .malformed "unexpected ')'"
if tk.front == '(' then
if let (ss, _tk :: tks) ← parseManyAux tks then
-- assertion: _tk == ')' since parseManyAux only stops on ')'
return (expr ss.toList, tks)
else
throw <| .incomplete "expected ')'"
else
return (atom tk.toString, tks)
| [] => throw <| .incomplete "expected a token, got none"

partial def parseManyAux : List Substring → Except ParseError (Array Sexp × List Substring) :=
go #[]
where go (stk : Array Sexp)
| tk :: tks => do
if tk.front == ')' then .ok (stk, tk :: tks)
else
let (e, tks) ← parseOneAux (tk :: tks)
go (stk.push e) tks
| [] => .ok (stk, [])
end

/-- Parse all the s-expressions in the given string. For example, `"(abc) (def)"` contains two. -/
def parseMany (s : String) : Except ParseError (List Sexp) := do
let tks ← tokenize s.toSubstring
let (sexps, tks) ← parseManyAux tks.toList
if !tks.isEmpty then
throw <| .malformed s!"unexpected '{tks.get! 0}'"
return sexps.toList

/-- Parse a single s-expression. Note that the string may contain extra data, but parsing will
succeed as soon as the single s-exp is complete. -/
def parseOne (s : String) : Except ParseError Sexp := do
let tks ← tokenize s.toSubstring
let (sexp, _) ← parseOneAux tks.toList
return sexp

end Sexp
- string literals `"abc"`
- comments `; abc`
-/

def comment : Parser Unit := do
skipChar ';'
discard <| many (satisfy (· != '\n'))
skipChar '\n' <|> eof

def misc : Parser Unit := do
ws
discard <| many (comment *> ws)

def strLit : Parser String := do
let cstart ← pchar '"'
let s ← manyCharsCore (satisfy (· ≠ '"')) cstart.toString
let cend ← pchar '"'
return s.push cend

def quotedSym : Parser String := do
let cstart ← pchar '|'
let s ← manyCharsCore (satisfy (· ≠ '|')) cstart.toString
let cend ← pchar '|'
return s.push cend

def sym : Parser String :=
let filter c := !c.isWhitespace && c != '(' && c != ')' && c != '|' && c != '"' && c != ';'
many1Chars (satisfy filter)

def atom : Parser Sexp := do
let a ← strLit <|> quotedSym <|> sym
return Sexp.atom a

/--
Parse all the s-expressions in the given string. For example, `"(abc) (def)"`
contains two. Note that the string may contain extra data, but parsing will
always succeed.
-/
def manySexps : Parser (List Sexp) := do
let mut stack : List (List Sexp) := []
let mut curr := []
let mut next ← misc *> peek?
while h : next.isSome do
match next.get h with
| '(' =>
skipChar '('
stack := curr :: stack
curr := []
| ')' =>
match stack with
| [] =>
return curr.reverse
| sexp :: sexps =>
skipChar ')'
stack := sexps
curr := .expr curr.reverse :: sexp
| _ =>
curr := (← atom) :: curr
next ← misc *> peek?
if !stack.isEmpty then
fail "expected ')'"
return curr.reverse

def expr : Parser Sexp := do
skipChar '('
let sexps ← manySexps
skipChar ')'
return Sexp.expr sexps

/--
Parse a single s-expression. Note that the string may contain extra data, but
parsing will succeed as soon as a single s-expr is complete.
-/
def sexp : Parser Sexp :=
atom <|> expr

/--
Parse all the s-expressions in the given string. For example, `"(abc) (def)"`
contains two. Parsing fails if there is any extra data after the last s-expr.
-/
def manySexps! : Parser (List Sexp) := do
let sexps ← manySexps
eof
return sexps

/--
Parse a single s-expression. Parsing fails if there is any extra data after the
s-expr.
-/
def sexp! : Parser Sexp := do
let s ← sexp
eof
return s

end Sexp.Parser
76 changes: 57 additions & 19 deletions Smt/Dsl/Sexp.lean
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Copyright (c) 2021-2022 by the authors listed in the file AUTHORS and their
institutional affiliations. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Wojciech Nawrocki
Authors: Wojciech Nawrocki, Abdalrhman Mohamed
-/

import Lean.PrettyPrinter.Formatter
Expand Down Expand Up @@ -36,36 +36,74 @@ instance : ToSexp String := ⟨Sexp.atom⟩
instance [ToSexp α] : CoeOut α Sexp := ⟨ToSexp.toSexp⟩

declare_syntax_cat sexp
declare_syntax_cat slist

syntax generalIdent : sexp
syntax "(" sexp* ")" : sexp
syntax "(" sexp* "...{" term "}" sexp* ")" : sexp
syntax "(" ")" : sexp
syntax "(" slist ")" : sexp
syntax "{" term "}" : sexp

-- This coercion is justified by the macro expansions below.
instance : Coe (Lean.TSyntax `sexp) (Lean.TSyntax `term) where
coe a := ⟨a.raw⟩
syntax sexp : slist
syntax "...{" term "}" : slist
syntax sexp slist : slist
syntax "...{" term "}" slist : slist

syntax "sexp!{" sexp "}" : term
syntax "slist!{" slist "}" : term

macro_rules
| `(sexp| $a:generalIdent) => `(Sexp.atom $(Lean.quote a.getGeneralId))
| `(sexp| ( $ss:sexp* )) => `(Sexp.expr [ $ss,* ])
| `(sexp| ( $ss:sexp* ...{ $t:term } $ts:sexp* )) => `(Sexp.expr <| [ $ss,* ] ++ ($t : List Sexp) ++ [ $ts,* ])
| `(sexp| { $t:term }) => `($t)
| `(slist| $s:sexp) => `([sexp!{$s}])
| `(slist| ...{ $t:term }) => `(($t : List Sexp))
| `(slist| $s:sexp $ss:slist) => `(sexp!{$s} :: slist!{$ss})
| `(slist| ...{ $t:term } $ss:slist) => `(($t : List Sexp) ++ slist!{$ss})

syntax "sexp!{" sexp "}" : term
macro_rules
| `(sexp| $a:generalIdent) => `(Sexp.atom $(Lean.quote a.getGeneralId))
| `(sexp| ()) => `(Sexp.expr [])
| `(sexp| ( $ss:slist )) => `(Sexp.expr slist!{$ss})
| `(sexp| { $t:term }) => `(($t : Sexp))

/-- S-expressions can be written using `sexp!{...}` syntax. -/
macro_rules
| `(sexp!{ $s:sexp }) => `($s)
| `(sexp!{ $s:sexp }) => `(sexp|$s)
| `(slist!{ $ss:slist }) => `(slist|$ss)

syntax "sexps!{" sexp* "}" : term
syntax "sexps!{" sexp* "...{" term "}" sexp* "}" : term
syntax "sexps!{" "}" : term
syntax "sexps!{" slist "}" : term
macro_rules
| `(sexps!{ $ss:sexp* }) => do
let ss ← ss.mapM fun s => `(sexp!{ $s })
`([ $[$ss],* ])
| `(sexps!{ $ss:sexp* ...{ $t:term } $ts:sexp* }) =>
`([ $[$ss],* ] ++ ($t : List Sexp) ++ [ $[$ts],* ])
| `(sexps!{ }) => `(([] : List Sexp))
| `(sexps!{ $ss:slist }) => `(slist!{$ss})

instance : Repr Sexp where
reprPrec s _ := s!"sexp!\{{toString s}}"

/-- info: sexp!{()} -/
#guard_msgs in #eval sexp!{()}
/-- info: sexp!{foo} -/
#guard_msgs in #eval sexp!{foo}
/-- info: sexp!{foo} -/
#guard_msgs in #eval sexp!{{Sexp.atom "foo"}}
/-- info: sexp!{(foo)} -/
#guard_msgs in #eval sexp!{(foo)}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{(foo bar)}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{(foo {Sexp.atom "bar"})}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{({Sexp.atom "foo"} bar)}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{(foo ...{[Sexp.atom "bar"]})}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{(...{[Sexp.atom "foo"]} bar)}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{(...{[Sexp.atom "foo"]} ...{[Sexp.atom "bar"]})}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{(...{[Sexp.atom "foo", Sexp.atom "bar"]})}
/-- info: sexp!{(foo bar)} -/
#guard_msgs in #eval sexp!{{Sexp.expr [Sexp.atom "foo", Sexp.atom "bar"]}}
/-- info: [] -/
#guard_msgs in #eval sexps!{}
/-- info: [sexp!{()}] -/
#guard_msgs in #eval sexps!{()}
/-- info: [sexp!{()}, sexp!{()}] -/
#guard_msgs in #eval sexps!{() ()}
8 changes: 4 additions & 4 deletions Smt/Translate/Solver.lean
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ private def getSexp : SolverT m Sexp := do
let state ← get
let mut line ← state.proc.stdout.getLine
let mut out := line
let mut parseRes := Sexp.parseOne out
while !line.isEmpty && parseRes matches .error (.incomplete _) do
let mut parseRes := Sexp.Parser.sexp.run out
while !line.isEmpty && parseRes matches .error _ do
line ← state.proc.stdout.getLine
out := out ++ line
parseRes := Sexp.parseOne out
parseRes := Sexp.Parser.sexp.run out
match parseRes with
| .ok sexp!{(error {.atom e})} => (throw (IO.userError (unquote e)) : IO _)
| .ok sexp => return sexp
Expand All @@ -108,7 +108,7 @@ private def getSexp : SolverT m Sexp := do
(throw (IO.userError (parseErrMsg e out err)) : IO _)
where
unquote s := s.extract ⟨1⟩ ⟨s.length - 1
parseErrMsg (e : Sexp.ParseError) (out err : String) :=
parseErrMsg (e : String) (out err : String) :=
s!"could not parse solver output: {e}\nsolver stdout:\n{out}\nsolver stderr:\n{err}"

/-- Create an instance of a generic SMT solver.
Expand Down

0 comments on commit a02090d

Please sign in to comment.