Faster multi-solution system

ethereum · Feb 12, 2025 · b2d59b6 · b2d59b6
1 parent 1bd1ba4
commit b2d59b6
Show file tree

Hide file tree

Showing 5 changed files with 150 additions and 81 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,7 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   and continue running, whenever possible
 - STATICCALL abstraction is now performed in case of symbolic arguments
 - Better error messages for JSON parsing
-- Multiple solutions are allowed for a single symbolic expression
+- Multiple solutions are allowed for a single symbolic expression, and they are
+  generated via iterative calls to the SMT solver for quicker solving
 - Aliasing works much better for symbolic and concrete addresses
 - Constant propagation for symbolic values
 

diff --git a/cli/cli.hs b/cli/cli.hs
@@ -102,7 +102,7 @@ data Command w
       , solverThreads :: w ::: Maybe Natural      <?> "Number of threads for each solver instance. Only respected for Z3 (default: 1)"
       , loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , noDecompose   :: w ::: Bool               <?> "Don't decompose storage slots into separate arrays"
-      , maxBranch     :: w ::: Int                <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 10)"
+      , maxBranch     :: w ::: Int                <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 100)"
       }
   | Equivalence -- prove equivalence between two programs
       { codeA         :: w ::: ByteString       <?> "Bytecode of the first program"
@@ -123,7 +123,7 @@ data Command w
       , solverThreads :: w ::: Maybe Natural    <?> "Number of threads for each solver instance. Only respected for Z3 (default: 1)"
       , loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , noDecompose   :: w ::: Bool             <?> "Don't decompose storage slots into separate arrays"
-      , maxBranch     :: w ::: Int              <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 10)"
+      , maxBranch     :: w ::: Int              <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 100)"
       }
   | Exec -- Execute a given program with specified env & calldata
       { code        :: w ::: Maybe ByteString  <?> "Program bytecode"
@@ -173,7 +173,7 @@ data Command w
       , maxIterations :: w ::: Maybe Integer            <?> "Number of times we may revisit a particular branching point. For no bound, set -1 (default: 5)"
       , loopDetectionHeuristic :: w ::: LoopHeuristic   <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , noDecompose   :: w ::: Bool                     <?> "Don't decompose storage slots into separate arrays"
-      , maxBranch     :: w ::: Int                      <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 10)"
+      , maxBranch     :: w ::: Int                      <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 100)"
       , numCexFuzz    :: w ::: Integer                  <!> "3" <?> "Number of fuzzing tries to do to generate a counterexample (default: 3)"
       , askSmtIterations :: w ::: Integer               <!> "1" <?> "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)"
       }
@@ -219,7 +219,7 @@ main = withUtf8 $ do
     , numCexFuzz = cmd.numCexFuzz
     , dumpTrace = cmd.trace
     , decomposeStorage = Prelude.not cmd.noDecompose
-    , maxNumBranch = cmd.maxBranch
+    , maxBranch = cmd.maxBranch
     } }
   case cmd of
     Version {} ->putStrLn getFullVersion

diff --git a/src/EVM/Effects.hs b/src/EVM/Effects.hs
@@ -43,7 +43,7 @@ data Config = Config
    -- Returns Unknown if the Cex cannot be found via fuzzing
   , onlyCexFuzz      :: Bool
   , decomposeStorage :: Bool
-  , maxNumBranch     :: Int
+  , maxBranch     :: Int
   }
   deriving (Show, Eq)
 
@@ -57,7 +57,7 @@ defaultConfig = Config
   , numCexFuzz = 10
   , onlyCexFuzz  = False
   , decomposeStorage = True
-  , maxNumBranch = 10
+  , maxBranch = 100
   }
 
 -- Write to the console

diff --git a/src/EVM/Fetch.hs b/src/EVM/Fetch.hs
@@ -27,11 +27,8 @@ import Numeric.Natural (Natural)
 import System.Environment (lookupEnv, getEnvironment)
 import System.Process
 import Control.Monad.IO.Class
-import Control.Monad (when)
 import EVM.Effects
 import qualified EVM.Expr as Expr
-import Numeric (showHex,readHex)
-import Data.Bits ((.&.))
 
 -- | Abstract representation of an RPC fetch request
 data RpcQuery a where
@@ -252,40 +249,18 @@ getSolutions solvers symExprPreSimp numBytes pathconditions = do
   conf <- readConfig
   liftIO $ do
     let symExpr = Expr.concKeccakSimpExpr symExprPreSimp
-    when conf.debug $ putStrLn $ "Collecting solutions to symbolic query: " <> show symExpr
-    ret <- collectSolutions [] conf.maxNumBranch symExpr pathconditions conf
+    -- when conf.debug $ putStrLn $ "Collecting solutions to symbolic query: " <> show symExpr
+    ret <- collectSolutions symExpr pathconditions conf
     case ret of
       Nothing -> pure Nothing
       Just r -> case length r of
         0 -> pure Nothing
         _ -> pure $ Just r
     where
-      createHexValue k =
-          let hexString = concat (replicate k "ff")
-          in fst . head $ readHex hexString
-      collectSolutions :: [W256] -> Int -> Expr EWord -> Prop -> Config -> IO (Maybe [W256])
-      collectSolutions vals maxSols symExpr conds conf = do
-        if (length vals > maxSols) then pure Nothing
-        else
-          checkSat solvers (assertProps conf [(PEq (Var "addrQuery") symExpr) .&& conds]) >>= \case
-            Sat (SMTCex vars _ _ _ _ _)  -> case (Map.lookup (Var "addrQuery") vars) of
-              Just v -> do
-                let hexMask = createHexValue numBytes
-                    maskedVal = v .&. hexMask
-                    cond = (And symExpr (Lit hexMask)) ./= Lit maskedVal
-                    newConds = Expr.simplifyProp $ PAnd conds cond
-                    showedVal = "val: 0x" <> (showHex maskedVal "")
-                when conf.debug $ putStrLn $ "Got one solution to symbolic query," <> showedVal <> " now have " <>
-                  show (length vals + 1) <> " solution(s), max is: " <> show maxSols
-                collectSolutions (maskedVal:vals) maxSols symExpr newConds conf
-              _ -> internalError "No solution to symbolic query"
-            Unsat -> do
-              when conf.debug $ putStrLn "No more solution(s) to symbolic query."
-              pure $ Just vals
-            -- Error or timeout, we need to be conservative
-            res -> do
-              when conf.debug $ putStrLn $ "Symbolic query result is neither SAT nor UNSAT:" <> show res
-              pure Nothing
+      collectSolutions :: Expr EWord -> Prop -> Config -> IO (Maybe [W256])
+      collectSolutions symExpr conds conf = do
+        let smt2 = assertProps conf [(PEq (Var "multiQueryVar") symExpr) .&& conds]
+        checkMulti solvers smt2 $ MultiSol { maxSols = conf.maxBranch , numBytes = numBytes , var = "multiQueryVar" }
 
 -- | Checks which branches are satisfiable, checking the pathconditions for consistency
 -- if the third argument is true.

diff --git a/src/EVM/Solvers.hs b/src/EVM/Solvers.hs
@@ -18,6 +18,7 @@ import Data.Map (Map)
 import Data.Map qualified as Map
 import Data.Maybe (fromMaybe, isJust, fromJust)
 import Data.Either (isLeft)
+import Data.Text qualified as TStrict
 import Data.Text.Lazy (Text)
 import Data.Text.Lazy qualified as T
 import Data.Text.Lazy.IO qualified as T
@@ -26,9 +27,12 @@ import System.Process (createProcess, cleanupProcess, proc, ProcessHandle, std_i
 import Witch (into)
 import EVM.Effects
 import EVM.Fuzz (tryCexFuzz)
+import Numeric (readHex)
+import Data.Bits ((.&.))
+import Numeric (showHex)
 
 import EVM.SMT
-import EVM.Types (W256, Expr(AbstractBuf), internalError, Err, getError, getNonError)
+import EVM.Types
 
 -- | Supported solvers
 data Solver
@@ -55,9 +59,23 @@ data SolverInstance = SolverInstance
 -- | A channel representing a group of solvers
 newtype SolverGroup = SolverGroup (Chan Task)
 
+data MultiSol = MultiSol
+  { maxSols :: Int
+  , numBytes :: Int
+  , var :: String
+  }
+
 -- | A script to be executed, a list of models to be extracted in the case of a sat result, and a channel where the result should be written
-data Task = Task
-  { script :: SMT2
+data Task = TaskSingle SingleData | TaskMulti MultiData
+
+data MultiData = MultiData
+  { smt2 :: SMT2
+  , multiSol :: MultiSol
+  , resultChan :: Chan (Maybe [W256])
+  }
+
+data SingleData = SingleData
+  { smt2 :: SMT2
   , resultChan :: Chan CheckSatResult
   }
 
@@ -77,21 +95,32 @@ isUnsat :: CheckSatResult -> Bool
 isUnsat Unsat = True
 isUnsat _ = False
 
+checkMulti :: SolverGroup -> Err SMT2 -> MultiSol -> IO (Maybe [W256])
+checkMulti (SolverGroup taskQueue) smt2 multiSol = do
+  if isLeft smt2 then pure Nothing
+  else do
+    -- prepare result channel
+    resChan <- newChan
+    -- send task to solver group
+    writeChan taskQueue (TaskMulti (MultiData (getNonError smt2) multiSol resChan))
+    -- collect result
+    readChan resChan
+
 checkSat :: SolverGroup -> Err SMT2 -> IO CheckSatResult
-checkSat (SolverGroup taskQueue) script = do
-  if isLeft script then pure $ Error $ getError script
+checkSat (SolverGroup taskQueue) smt2 = do
+  if isLeft smt2 then pure $ Error $ getError smt2
   else do
     -- prepare result channel
     resChan <- newChan
     -- send task to solver group
-    writeChan taskQueue (Task (getNonError script) resChan)
+    writeChan taskQueue (TaskSingle (SingleData (getNonError smt2) resChan))
     -- collect result
     readChan resChan
 
-writeSMT2File :: SMT2 -> Int -> IO ()
-writeSMT2File smt2 count =
+writeSMT2File :: SMT2 -> String -> IO ()
+writeSMT2File smt2 postfix =
     let content = formatSMT2 smt2 <> "\n\n(check-sat)"
-    in T.writeFile ("query-" <> (show count) <> ".smt2") content
+    in T.writeFile ("query-" <> postfix <> ".smt2") content
 
 withSolvers :: App m => Solver -> Natural -> Natural -> Maybe Natural -> (SolverGroup -> m a) -> m a
 withSolvers solver count threads timeout cont = do
@@ -116,43 +145,107 @@ withSolvers solver count threads timeout cont = do
     orchestrate queue avail fileCounter = do
       task <- liftIO $ readChan queue
       inst <- liftIO $ readChan avail
-      runTask' <- toIO $ runTask task inst avail fileCounter
+      runTask' <- case task of
+        TaskSingle (SingleData smt2 r) -> toIO $ getOneSol smt2 r inst avail fileCounter
+        TaskMulti (MultiData smt2 multiSol r) -> toIO $ getMultiSol smt2 multiSol r inst avail fileCounter
       _ <- liftIO $ forkIO runTask'
       orchestrate queue avail (fileCounter + 1)
 
-    runTask :: (MonadIO m, ReadConfig m) => Task -> SolverInstance -> Chan SolverInstance -> Int -> m ()
-    runTask (Task smt2@(SMT2 cmds cexvars ps) r) inst availableInstances fileCounter = do
+getMultiSol :: forall m. (MonadIO m, ReadConfig m) => SMT2 -> MultiSol -> (Chan (Maybe [W256])) -> SolverInstance -> Chan SolverInstance -> Int -> m ()
+getMultiSol smt2@(SMT2 cmds cexvars _) multiSol r inst availableInstances fileCounter = do
+  conf <- readConfig
+  when conf.dumpQueries $ liftIO $ writeSMT2File smt2 (show fileCounter)
+  -- reset solver and send all lines of provided script
+  out <- liftIO $ sendScript inst ("(reset)" : cmds)
+  case out of
+    Left err -> liftIO $ do
+      when conf.debug $ putStrLn $ "Unable to write SMT to solver: " <> (T.unpack err)
+      writeChan r Nothing
+    Right _ -> do
+      sat <- liftIO $ sendLine inst "(check-sat)"
+      subRun [] smt2 sat
+  -- put the instance back in the list of available instances
+  liftIO $ writeChan availableInstances inst
+  where
+    createHexValue k =
+       let hexString = concat (replicate k "ff")
+       in fst . head $ readHex hexString
+    subRun :: (MonadIO m, ReadConfig m) => [W256] -> SMT2 -> Text -> m ()
+    subRun vals fullSmt sat = do
       conf <- readConfig
-      let fuzzResult = tryCexFuzz ps conf.numCexFuzz
-      liftIO $ do
-        when (conf.dumpQueries) $ writeSMT2File smt2 fileCounter
-        if (isJust fuzzResult)
-          then do
-            when (conf.debug) $ putStrLn $ "   Cex found via fuzzing:" <> (show fuzzResult)
-            writeChan r (Sat $ fromJust fuzzResult)
-          else if not conf.onlyCexFuzz then do
-            -- reset solver and send all lines of provided script
-            out <- sendScript inst (SMT2 ("(reset)" : cmds) mempty ps)
-            case out of
-              -- if we got an error then return it
-              Left e -> writeChan r (Error $ "Error while writing SMT to solver: " <> T.unpack e)
-              -- otherwise call (check-sat), parse the result, and send it down the result channel
-              Right () -> do
-                sat <- sendLine inst "(check-sat)"
-                res <- do
-                    case sat of
-                      "unsat" -> pure Unsat
-                      "timeout" -> pure $ Unknown "Result timeout by SMT solver"
-                      "unknown" -> pure $ Unknown "Result unknown by SMT solver"
-                      "sat" -> Sat <$> getModel inst cexvars
-                      _ -> pure . Error $ "Unable to parse SMT solver output: " <> T.unpack sat
-                writeChan r res
+      case sat of
+        "unsat" -> liftIO $ do
+          when conf.debug $ putStrLn $ "No more solutions to query, returning: " <> show vals
+          liftIO $ writeChan r (Just vals)
+        "timeout" -> liftIO $ do
+           when conf.debug $ putStrLn "Timeout inside SMT solver."
+           writeChan r Nothing
+        "unknown" -> liftIO $ do
+           when conf.debug $ putStrLn "Unknown result by SMT solver."
+           writeChan r Nothing
+        "sat" -> do
+          if length vals >= multiSol.maxSols then liftIO $ do
+            when conf.debug $ putStrLn "Too many solutions to symbolic query."
+            writeChan r Nothing
           else do
-            when (conf.debug) $ putStrLn "Fuzzing failed to find a Cex, not trying SMT due to onlyCexFuzz"
-            writeChan r $ Error "Option onlyCexFuzz enabled, not running SMT"
-
-        -- put the instance back in the list of available instances
-        writeChan availableInstances inst
+            cex <- liftIO $ getModel inst cexvars
+            case Map.lookup (Var (TStrict.pack multiSol.var)) cex.vars of
+              Just v -> do
+                let hexMask = createHexValue multiSol.numBytes
+                    maskedVal = v .&. hexMask
+                    toSMT n = show (into n :: Integer)
+                    maskedVar = "(bvand " <> multiSol.var <> " (_ bv" <> toSMT hexMask <> " 256))"
+                    restrict = "(assert (not (= " <> maskedVar <> " (_ bv" <> toSMT maskedVal <> " 256))))"
+                    newSmt = fullSmt <> SMT2 [(fromString restrict)] mempty mempty
+                when conf.debug $ liftIO $ putStrLn $ "Got one solution to symbolic query, val: 0x" <> (showHex maskedVal "") <>
+                  " now have " <> show (length vals + 1) <> " solution(s), max is: " <> show multiSol.maxSols
+                when conf.dumpQueries $ liftIO $ writeSMT2File newSmt (show fileCounter <> "-sol" <> show (length vals))
+                out <- liftIO $ sendLine inst (T.pack restrict)
+                case out of
+                  "success" -> do
+                    out2 <- liftIO $ sendLine inst  (T.pack "(check-sat)")
+                    subRun (maskedVal:vals) newSmt out2
+                  err -> liftIO $ do
+                    when conf.debug $ putStrLn $ "Unable to write SMT to solver: " <> (T.unpack err)
+                    writeChan r Nothing
+              Nothing -> internalError $ "variable " <>  multiSol.var <> " not part of model (i.e. cex) ... that's not possible"
+        err -> liftIO $ do
+          when conf.debug $ putStrLn $ "Unable to write SMT to solver: " <> (T.unpack err)
+          writeChan r Nothing
+
+getOneSol :: (MonadIO m, ReadConfig m) => SMT2 -> (Chan CheckSatResult) -> SolverInstance -> Chan SolverInstance -> Int -> m ()
+getOneSol smt2@(SMT2 cmds cexvars ps) r inst availableInstances fileCounter = do
+  conf <- readConfig
+  let fuzzResult = tryCexFuzz ps conf.numCexFuzz
+  liftIO $ do
+    when (conf.dumpQueries) $ writeSMT2File smt2 (show fileCounter)
+    if (isJust fuzzResult)
+      then do
+        when (conf.debug) $ putStrLn $ "   Cex found via fuzzing:" <> (show fuzzResult)
+        writeChan r (Sat $ fromJust fuzzResult)
+      else if Prelude.not conf.onlyCexFuzz then do
+        -- reset solver and send all lines of provided script
+        out <- sendScript inst ("(reset)" : cmds)
+        case out of
+          -- if we got an error then return it
+          Left e -> writeChan r (Error $ "Error while writing SMT to solver: " <> T.unpack e)
+          -- otherwise call (check-sat), parse the result, and send it down the result channel
+          Right () -> do
+            sat <- sendLine inst "(check-sat)"
+            res <- do
+                case sat of
+                  "unsat" -> pure Unsat
+                  "timeout" -> pure $ EVM.Solvers.Unknown "Result timeout by SMT solver"
+                  "unknown" -> pure $ EVM.Solvers.Unknown "Result unknown by SMT solver"
+                  "sat" -> Sat <$> getModel inst cexvars
+                  _ -> pure . Error $ "Unable to parse SMT solver output: " <> T.unpack sat
+            writeChan r res
+      else do
+        when (conf.debug) $ putStrLn "Fuzzing failed to find a Cex, not trying SMT due to onlyCexFuzz"
+        writeChan r $ Error "Option onlyCexFuzz enabled, not running SMT"
+
+    -- put the instance back in the list of available instances
+    writeChan availableInstances inst
 
 getModel :: SolverInstance -> CexVars -> IO SMTCex
 getModel inst cexvars = do
@@ -291,8 +384,8 @@ stopSolver :: SolverInstance -> IO ()
 stopSolver (SolverInstance _ stdin stdout process) = cleanupProcess (Just stdin, Just stdout, Nothing, process)
 
 -- | Sends a list of commands to the solver. Returns the first error, if there was one.
-sendScript :: SolverInstance -> SMT2 -> IO (Either Text ())
-sendScript solver (SMT2 cmds _ _) = do
+sendScript :: SolverInstance -> [Builder] -> IO (Either Text ())
+sendScript solver cmds = do
   let sexprs = splitSExpr $ fmap toLazyText cmds
   go sexprs
   where