diff --git a/.gitmodules b/.gitmodules index 0a067f8..bda146c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,9 +7,6 @@ [submodule "submodules/dismantle"] path = submodules/dismantle url = https://github.com/travitch/dismantle.git -[submodule "submodules/what4-serialize"] - path = submodules/what4-serialize - url = https://github.com/GaloisInc/what4-serialize.git [submodule "submodules/what4"] path = submodules/what4 url = https://github.com/GaloisInc/what4 diff --git a/asl-translator.cabal b/asl-translator.cabal index 97b0227..a59c254 100644 --- a/asl-translator.cabal +++ b/asl-translator.cabal @@ -49,7 +49,6 @@ common shared-properties crucible, what4, asl-parser, - what4-serialize, s-cargot, ordered-containers, split, @@ -91,6 +90,7 @@ library Language.ASL.Globals.ConsistencyCheck Data.Parameterized.CtxFuns Data.Parameterized.AssignTree + Data.Parameterized.SomeSome Language.ASL.Formulas.Attach Language.ASL.Formulas Util.Log @@ -103,7 +103,7 @@ library executable asl-translator-exec main-is: Main.hs build-depends: base >=4.10.0.0 && < 5, - what4-serialize, + what4, asl-translator, split, text >= 1 && < 2 @@ -117,7 +117,7 @@ test-suite asl-translator-genarm-test hs-source-dirs: tests/Translation default-language: Haskell2010 build-depends: base >= 4.10.0.0 && < 5, - what4-serialize, + what4, asl-translator main-is: Main.hs @@ -140,7 +140,6 @@ executable asl-translator-concrete-test build-depends: base >= 4.10.0.0 && < 5, bv-sized, libBF >= 0.6 && < 0.7, - what4-serialize, what4, parameterized-utils, asl-translator, diff --git a/cabal.project.newbuild b/cabal.project.newbuild index 23d98f8..6300b58 100644 --- a/cabal.project.newbuild +++ b/cabal.project.newbuild @@ -1,6 +1,5 @@ packages: *.cabal submodules/arm-asl-parser/ - submodules/what4-serialize/ submodules/dismantle/dismantle-tablegen/ submodules/dismantle/dismantle-arm-xml/ submodules/crucible/crucible diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 849cef0..896c2cc 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -10,7 +10,7 @@ a closed-form [What4][fn:what4] function in order to serve as the formal semanti for the AArch32 architecture for the purpose of perform binary analysis on compiled executables. -This document outlines some of the key technical challenges addressed by +This document outlines some of the key technical challenges addressed by the translator # Architecture @@ -52,7 +52,7 @@ instantiation for AArch32. +--------------------------+ semmc-asl ``` # ASL Translation Pipeline -The `asl-translation-exec` executable (`./exe/Main.hs`) and driver library (`Language.ASL.Translation.Driver`) +The `asl-translation-exec` executable (`./exe/Main.hs`) and driver library (`Language.ASL.Translation.Driver`) implements the pipeline to transform the parsed ASL specification into formal semantics (as a collection of What4 functions). Translation proceeds as follows: @@ -170,7 +170,7 @@ bits(32) S_GETTER(integer n) assert n >= 0 && n <= 31; base = (n MOD 4) * 32; return _V[n DIV 4]; - + S_SETTER(integer n, bits(32) value) assert n >= 0 && n <= 31; base = (n MOD 4) * 32; @@ -261,7 +261,7 @@ bits(N) BitReverse(bits(N) data) This function is called by the `CRC32_A` instruction with the contents of some GPR. ``` -acc = R[n]; +acc = R[n]; tempacc = BitReverse(acc):Zeros(size); ``` @@ -431,11 +431,11 @@ After unification, we create an uninterpreted function with the appropriate signature, and with a name that will allow us to later associate it with its translated semantics. The function signature tells us which global variables to provide to the function (i.e. its global reads), and what global variables may -have been modified after its execution (i.e. its global writes). We collect +have been modified after its execution (i.e. its global writes). We collect the global reads from the current register state of the CFG and package them into a single struct. This is passed to the uninterpreted function, along with its natural arguments. The output of the uninterpreted function is then -unpacked into its natural return value, as well as the resulting set of +unpacked into its natural return value, as well as the resulting set of global writes. Each global write is then assigned to the corresponding register state of the CFG, and the natural return value is finally given as the value of the function evaluation. @@ -609,7 +609,7 @@ Along with its Crucible representation, each ASL expression is therefore given some `ExtendedTypeData` which tracks additional type information. In the case of a struct, it contains a mapping from struct names to indexes. When the translator is initialized for a function, an `ExtendedTypeData` is provided -for each argument based on its ASL type. This extended data is propagated +for each argument based on its ASL type. This extended data is propagated during translation, and used to resolve named field accesses. Each ASL expression is also translated under some `TypeConstraint`, representing @@ -708,7 +708,7 @@ bindings that have been calculated so far during unification. In our example, `Zeros(16)` is translated without constraints, since its corresponding formal argument type `bits(N*M)` cannot be fully resolved under -the binding environment `[M := 2]`. +the binding environment `[M := 2]`. Next the type of the resulting Crucible atom is unified against its argument type, potentially discovering bindings for type variables. @@ -728,7 +728,7 @@ unified against the current type constraint. Here we unify `BaseBVType 10` with The function signature is then evaluated in the binding environment to confirm that all bitvector widths have been fully monomorphized. A variant name is derived from the binding environment (e.g. `f_L_2_N_8_M_2`) as the formal -handle for the monomorphized variant of the function, and then translated +handle for the monomorphized variant of the function, and then translated into a function call. The function name and binding environment pair are then recorded as a dependency of the current function, potentially creating a future translation obligation. @@ -794,7 +794,7 @@ ASL instruction or function. Once all the target instructions and dependent functions have been successfully translated and simulated, they are serialized as s-expressions with `Language.ASL.Formulas.Serialize`, which uses -[what4-serialize][fn:what4-serialize] as its backend. +[what4's serialization machinery][fn:what4-serialize] as its backend. The `Language.ASL.Driver` produces two files by default: `functions.what4` and `instructions.what4`, corresponding to the translated and serialized functions from `arm_defs.asl` and the instructions from `arm_instrs.asl` respectively. @@ -838,7 +838,7 @@ For example, a function which takes and returns a single integer `integer f(integer x) := x` would be rewritten to instead use a 65-bit bitvector (able to hold a 64 bit value with an additional sign bit): `bits(65) f_norm(bits(65) x) := x`. The proxy function then simply projects to and from this bitvector to -re-create the original function: +re-create the original function: `integer f(integer x) := sbvToInteger(f_norm(integerToBV(x)))`. This proxy function is then added to the environment instead of the original, @@ -936,7 +936,7 @@ TODO * replacing integers with `bits(65)` everywhere. * rewriting integer arithmetic with guarded bitvector arithmetic * rewriting `integerToBV`, `bvToInteger` and `sbvToInteger` as either -a bitvector truncation, sign-extend or zero-extend as appropriate, with +a bitvector truncation, sign-extend or zero-extend as appropriate, with assertions that information is not thrown away. @@ -960,17 +960,17 @@ after a block of statements has been executed # Outstanding Issues -* EndOfInstruction +* EndOfInstruction - what are the intended semantics of this function? - Does this affect the semantics of normal (user-mode) instructions? * Inlining `getSlice` and `setSlice` for concrete arguments - normalization could avoid the bitshifting implementation in favor of standard bitvector primitives when the bitvector indexes are statically-known - + * Removing dead code (especially old hacks) - hacks for renaming functions with clashing names - + * Use What4 `ConcreteVal` instead of manually-defined `StaticExpr` - Possibly able to use What4 static evaluation? - Reduce risk of inconsistencies between the static ASL semantics and @@ -983,7 +983,7 @@ after a block of statements has been executed [fn:semmc]: https://github.com/GaloisInc/semmc -[fn:what4-serialize]: https://github.com/GaloisInc/what4-serialize/ +[fn:what4-serialize]: https://github.com/GaloisInc/what4/tree/master/what4/src/What4/Serialize [fn:asl-description]: https://alastairreid.github.io/dissecting-ARM-MRA/ [fn:what4]: https://github.com/GaloisInc/crucible/tree/master/what4/ [fn:mra_tools]: https://github.com/alastairreid/mra_tools diff --git a/exe/Concrete.hs b/exe/Concrete.hs index 4f71f95..0b5474d 100644 --- a/exe/Concrete.hs +++ b/exe/Concrete.hs @@ -10,6 +10,7 @@ import qualified Data.BitVector.Sized as BV import qualified Data.Map as M import Data.Parameterized.Nonce import Data.Parameterized.Some ( Some(..) ) +import qualified Data.Parameterized.SomeSome as U import Data.Parameterized.TraversableFC import qualified Data.Ratio as R import qualified Data.Text as T @@ -17,7 +18,6 @@ import qualified LibBF as BF import qualified What4.Interface as WI import qualified What4.Expr as B import qualified What4.Utils.Complex as U -import qualified What4.Utils.Util as U import qualified What4.Utils.Word16String as U import Control.Monad.IO.Class import System.Exit diff --git a/exe/Main.hs b/exe/Main.hs index f0f19d0..a4c4cbf 100644 --- a/exe/Main.hs +++ b/exe/Main.hs @@ -30,7 +30,7 @@ import Language.ASL.Translation.Driver ( TranslatorOptions(..), StatOp ) import qualified Language.ASL.Translation.Driver as ASL -import qualified What4.Utils.Log as Log +import qualified What4.Serialize.Log as Log import Debug.Trace diff --git a/lib/Data/Parameterized/SomeSome.hs b/lib/Data/Parameterized/SomeSome.hs new file mode 100644 index 0000000..0531229 --- /dev/null +++ b/lib/Data/Parameterized/SomeSome.hs @@ -0,0 +1,11 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PolyKinds #-} + +module Data.Parameterized.SomeSome + ( SomeSome(..) + ) where + +import Data.Kind ( Type ) + +-- | Like @Data.Parameterized.Some.Some@, but for doubly-parameterized types. +data SomeSome (f :: k1 -> k2 -> Type) = forall x y. SomeSome (f x y) diff --git a/lib/Language/ASL/Crucible.hs b/lib/Language/ASL/Crucible.hs index d464aaf..9121b84 100644 --- a/lib/Language/ASL/Crucible.hs +++ b/lib/Language/ASL/Crucible.hs @@ -118,7 +118,7 @@ import qualified Language.ASL.Globals as G import qualified Lang.Crucible.CFG.Extension as CCExt -- FIXME: this should be moved somewhere general -import What4.Utils.Log ( HasLogCfg, getLogCfg ) +import What4.Serialize.Log ( HasLogCfg, getLogCfg ) data GenFunction arch innerReads innerWrites outerReads outerWrites init tps = diff --git a/lib/Language/ASL/Formulas.hs b/lib/Language/ASL/Formulas.hs index 518545c..15ef657 100644 --- a/lib/Language/ASL/Formulas.hs +++ b/lib/Language/ASL/Formulas.hs @@ -32,10 +32,10 @@ import qualified System.Directory as D import qualified What4.Expr.Builder as WB import qualified What4.Interface as WI -import What4.Utils.Util ( SomeSome(..) ) import qualified System.IO as IO import qualified Codec.Compression.GZip as GZ +import Data.Parameterized.SomeSome ( SomeSome(..) ) import qualified Language.ASL.Formulas.Serialize as FS import Paths_asl_translator as P diff --git a/lib/Language/ASL/Formulas/Normalize.hs b/lib/Language/ASL/Formulas/Normalize.hs index 79100d2..df4dd5e 100644 --- a/lib/Language/ASL/Formulas/Normalize.hs +++ b/lib/Language/ASL/Formulas/Normalize.hs @@ -116,13 +116,13 @@ import qualified What4.SemiRing as WI import qualified What4.Expr as WB import qualified What4.Expr.Builder as WB import qualified What4.Expr.WeightedSum as WSum -import What4.Utils.Util ( SomeSome(..) ) import qualified What4.Serialize.Printer as WP -- from this package import qualified Language.ASL.Formulas.Serialize as FS import Data.Parameterized.CtxFuns +import Data.Parameterized.SomeSome ( SomeSome(..) ) import qualified What4.Expr.ExprTree as AT import What4.Expr.ExprTree ( withSym, forWithIndex ) diff --git a/lib/Language/ASL/Formulas/Serialize.hs b/lib/Language/ASL/Formulas/Serialize.hs index f70b1d0..9b94e06 100644 --- a/lib/Language/ASL/Formulas/Serialize.hs +++ b/lib/Language/ASL/Formulas/Serialize.hs @@ -71,9 +71,7 @@ import Data.Parameterized.Some ( Some(..) ) import qualified What4.Symbol as WI import qualified What4.Interface as WI import qualified What4.Expr.Builder as WB - -import What4.Utils.Util ( SomeSome(..) ) -import qualified What4.Utils.Util as U +import qualified What4.Utils.Serialize as U import qualified What4.Serialize.Printer as S ( pattern L, pattern A ) import What4.Serialize.Printer ( Atom(..), SExpr ) @@ -81,6 +79,8 @@ import qualified What4.Serialize.Printer as WP import qualified What4.Serialize.Parser as WPD import qualified What4.Serialize.FastSExpr as WSF +import Data.Parameterized.SomeSome ( SomeSome(..) ) + -- | Environment mapping formal names to ExprSymFns type ExprSymFnEnv t = Map T.Text (SomeSome (WB.ExprSymFn t)) diff --git a/lib/Language/ASL/Translation.hs b/lib/Language/ASL/Translation.hs index 0a8836e..5178c1f 100644 --- a/lib/Language/ASL/Translation.hs +++ b/lib/Language/ASL/Translation.hs @@ -94,8 +94,8 @@ import qualified Lang.Crucible.CFG.Reg as CCR import qualified What4.Utils.MonadST as MST import qualified Data.STRef as STRef -import What4.Utils.Log ( MonadHasLogCfg(..), LogCfg, logTrace ) -import qualified What4.Utils.Log as Log +import What4.Serialize.Log ( MonadHasLogCfg(..), LogCfg, logTrace ) +import qualified What4.Serialize.Log as Log import Util.Log ( MonadLog(..), logMsg, logIntToLvl, indentLog, unindentLog ) diff --git a/lib/Language/ASL/Translation/Driver.hs b/lib/Language/ASL/Translation/Driver.hs index 2b2d7ea..e0cb85f 100644 --- a/lib/Language/ASL/Translation/Driver.hs +++ b/lib/Language/ASL/Translation/Driver.hs @@ -124,10 +124,11 @@ import qualified Prettyprinter as LPP import qualified Prettyprinter.Render.String as LPP import qualified Text.PrettyPrint.HughesPJClass as PP -import What4.Utils.Log ( HasLogCfg, LogCfg, withLogCfg ) -import qualified What4.Utils.Log as Log -import What4.Utils.Util ( SomeSome(..) ) -import qualified What4.Utils.Util as U +import qualified Data.Parameterized.SomeSome as U +import Data.Parameterized.SomeSome ( SomeSome(..) ) +import What4.Serialize.Log ( HasLogCfg, LogCfg, withLogCfg ) +import qualified What4.Serialize.Log as Log +import qualified What4.Utils.Serialize as U import Util.Log ( MonadLog(..), logIntToLvl, logMsgStr ) -- | Configuration options controlling translation and simulation diff --git a/lib/Util/Log.hs b/lib/Util/Log.hs index 0b57d19..fc70d20 100644 --- a/lib/Util/Log.hs +++ b/lib/Util/Log.hs @@ -29,7 +29,7 @@ import qualified Control.Monad.Writer as MW import qualified Control.Monad.RWS as RWS import qualified Data.Text as T -import qualified What4.Utils.Log as WLog +import qualified What4.Serialize.Log as WLog -- Simple logging monad transformer/class class Monad m => MonadLog m where diff --git a/submodules/what4 b/submodules/what4 index 835238d..6c462cd 160000 --- a/submodules/what4 +++ b/submodules/what4 @@ -1 +1 @@ -Subproject commit 835238d87500d48c3b0ae9121f2b484b71faa8b5 +Subproject commit 6c462cd46e0ea9ebbfbd6b6ea237984eeb3dc72a diff --git a/submodules/what4-serialize b/submodules/what4-serialize deleted file mode 160000 index d8b3fa7..0000000 --- a/submodules/what4-serialize +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d8b3fa73f7001614c593792aa082e13f4191e110 diff --git a/tests/Translation/Main.hs b/tests/Translation/Main.hs index 0763123..143a3df 100644 --- a/tests/Translation/Main.hs +++ b/tests/Translation/Main.hs @@ -5,7 +5,7 @@ import Language.ASL.Translation.Driver ( TranslatorOptions(..) , FilePathConfig(..), TranslationDepth(..) ) import qualified Language.ASL.Translation.Driver as ASL -import qualified What4.Utils.Log as Log +import qualified What4.Serialize.Log as Log main :: IO ()