Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster multi-solution system #652

Merged
merged 4 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
and continue running, whenever possible
- STATICCALL abstraction is now performed in case of symbolic arguments
- Better error messages for JSON parsing
- Multiple solutions are allowed for a single symbolic expression
- Multiple solutions are allowed for a single symbolic expression, and they are
generated via iterative calls to the SMT solver for quicker solving
- Aliasing works much better for symbolic and concrete addresses
- Constant propagation for symbolic values

Expand Down
8 changes: 4 additions & 4 deletions cli/cli.hs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ data Command w
, solverThreads :: w ::: Maybe Natural <?> "Number of threads for each solver instance. Only respected for Z3 (default: 1)"
, loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
, noDecompose :: w ::: Bool <?> "Don't decompose storage slots into separate arrays"
, maxBranch :: w ::: Int <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 10)"
, maxBranch :: w ::: Int <!> "100" <?> "Max number of branches to explore when encountering a symbolic value (default: 100)"
}
| Equivalence -- prove equivalence between two programs
{ codeA :: w ::: ByteString <?> "Bytecode of the first program"
Expand All @@ -123,7 +123,7 @@ data Command w
, solverThreads :: w ::: Maybe Natural <?> "Number of threads for each solver instance. Only respected for Z3 (default: 1)"
, loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
, noDecompose :: w ::: Bool <?> "Don't decompose storage slots into separate arrays"
, maxBranch :: w ::: Int <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 10)"
, maxBranch :: w ::: Int <!> "100" <?> "Max number of branches to explore when encountering a symbolic value (default: 100)"
}
| Exec -- Execute a given program with specified env & calldata
{ code :: w ::: Maybe ByteString <?> "Program bytecode"
Expand Down Expand Up @@ -173,7 +173,7 @@ data Command w
, maxIterations :: w ::: Maybe Integer <?> "Number of times we may revisit a particular branching point. For no bound, set -1 (default: 5)"
, loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
, noDecompose :: w ::: Bool <?> "Don't decompose storage slots into separate arrays"
, maxBranch :: w ::: Int <!> "10" <?> "Max number of branches to explore when encountering a symbolic value (default: 10)"
, maxBranch :: w ::: Int <!> "100" <?> "Max number of branches to explore when encountering a symbolic value (default: 100)"
, numCexFuzz :: w ::: Integer <!> "3" <?> "Number of fuzzing tries to do to generate a counterexample (default: 3)"
, askSmtIterations :: w ::: Integer <!> "1" <?> "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)"
}
Expand Down Expand Up @@ -219,7 +219,7 @@ main = withUtf8 $ do
, numCexFuzz = cmd.numCexFuzz
, dumpTrace = cmd.trace
, decomposeStorage = Prelude.not cmd.noDecompose
, maxNumBranch = cmd.maxBranch
, maxBranch = cmd.maxBranch
} }
case cmd of
Version {} ->putStrLn getFullVersion
Expand Down
4 changes: 3 additions & 1 deletion src/EVM.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3021,7 +3021,9 @@ instance VMOps Symbolic where
Just concVals -> do
assign #result Nothing
case (length concVals) of
0 -> continue Nothing
-- zero solutions means that we are in a branch that's not possible. Revert.
-- TODO: stop execution of the EVM completely
0 -> finishFrame (FrameReverted (ConcreteBuf ""))
1 -> runOne $ head concVals
_ -> runBoth . PleaseRunBoth ewordExpr $ runMore concVals
Nothing -> do
Expand Down
4 changes: 2 additions & 2 deletions src/EVM/Effects.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ data Config = Config
-- Returns Unknown if the Cex cannot be found via fuzzing
, onlyCexFuzz :: Bool
, decomposeStorage :: Bool
, maxNumBranch :: Int
, maxBranch :: Int
}
deriving (Show, Eq)

Expand All @@ -57,7 +57,7 @@ defaultConfig = Config
, numCexFuzz = 10
, onlyCexFuzz = False
, decomposeStorage = True
, maxNumBranch = 10
, maxBranch = 100
}

-- Write to the console
Expand Down
37 changes: 6 additions & 31 deletions src/EVM/Fetch.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,8 @@ import Numeric.Natural (Natural)
import System.Environment (lookupEnv, getEnvironment)
import System.Process
import Control.Monad.IO.Class
import Control.Monad (when)
import EVM.Effects
import qualified EVM.Expr as Expr
import Numeric (showHex,readHex)
import Data.Bits ((.&.))

-- | Abstract representation of an RPC fetch request
data RpcQuery a where
Expand Down Expand Up @@ -252,40 +249,18 @@ getSolutions solvers symExprPreSimp numBytes pathconditions = do
conf <- readConfig
liftIO $ do
let symExpr = Expr.concKeccakSimpExpr symExprPreSimp
when conf.debug $ putStrLn $ "Collecting solutions to symbolic query: " <> show symExpr
ret <- collectSolutions [] conf.maxNumBranch symExpr pathconditions conf
-- when conf.debug $ putStrLn $ "Collecting solutions to symbolic query: " <> show symExpr
ret <- collectSolutions symExpr pathconditions conf
case ret of
Nothing -> pure Nothing
Just r -> case length r of
0 -> pure Nothing
_ -> pure $ Just r
where
createHexValue k =
let hexString = concat (replicate k "ff")
in fst . head $ readHex hexString
collectSolutions :: [W256] -> Int -> Expr EWord -> Prop -> Config -> IO (Maybe [W256])
collectSolutions vals maxSols symExpr conds conf = do
if (length vals > maxSols) then pure Nothing
else
checkSat solvers (assertProps conf [(PEq (Var "addrQuery") symExpr) .&& conds]) >>= \case
Sat (SMTCex vars _ _ _ _ _) -> case (Map.lookup (Var "addrQuery") vars) of
Just v -> do
let hexMask = createHexValue numBytes
maskedVal = v .&. hexMask
cond = (And symExpr (Lit hexMask)) ./= Lit maskedVal
newConds = Expr.simplifyProp $ PAnd conds cond
showedVal = "val: 0x" <> (showHex maskedVal "")
when conf.debug $ putStrLn $ "Got one solution to symbolic query," <> showedVal <> " now have " <>
show (length vals + 1) <> " solution(s), max is: " <> show maxSols
collectSolutions (maskedVal:vals) maxSols symExpr newConds conf
_ -> internalError "No solution to symbolic query"
Unsat -> do
when conf.debug $ putStrLn "No more solution(s) to symbolic query."
pure $ Just vals
-- Error or timeout, we need to be conservative
res -> do
when conf.debug $ putStrLn $ "Symbolic query result is neither SAT nor UNSAT:" <> show res
pure Nothing
collectSolutions :: Expr EWord -> Prop -> Config -> IO (Maybe [W256])
collectSolutions symExpr conds conf = do
let smt2 = assertProps conf [(PEq (Var "multiQueryVar") symExpr) .&& conds]
checkMulti solvers smt2 $ MultiSol { maxSols = conf.maxBranch , numBytes = numBytes , var = "multiQueryVar" }

-- | Checks which branches are satisfiable, checking the pathconditions for consistency
-- if the third argument is true.
Expand Down
179 changes: 136 additions & 43 deletions src/EVM/Solvers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import Data.Map (Map)
import Data.Map qualified as Map
import Data.Maybe (fromMaybe, isJust, fromJust)
import Data.Either (isLeft)
import Data.Text qualified as TStrict
import Data.Text.Lazy (Text)
import Data.Text.Lazy qualified as T
import Data.Text.Lazy.IO qualified as T
Expand All @@ -26,9 +27,12 @@ import System.Process (createProcess, cleanupProcess, proc, ProcessHandle, std_i
import Witch (into)
import EVM.Effects
import EVM.Fuzz (tryCexFuzz)
import Numeric (readHex)
import Data.Bits ((.&.))
import Numeric (showHex)

import EVM.SMT
import EVM.Types (W256, Expr(AbstractBuf), internalError, Err, getError, getNonError)
import EVM.Types

-- | Supported solvers
data Solver
Expand All @@ -55,9 +59,23 @@ data SolverInstance = SolverInstance
-- | A channel representing a group of solvers
newtype SolverGroup = SolverGroup (Chan Task)

data MultiSol = MultiSol
{ maxSols :: Int
, numBytes :: Int
, var :: String
}

-- | A script to be executed, a list of models to be extracted in the case of a sat result, and a channel where the result should be written
data Task = Task
{ script :: SMT2
data Task = TaskSingle SingleData | TaskMulti MultiData

data MultiData = MultiData
{ smt2 :: SMT2
, multiSol :: MultiSol
, resultChan :: Chan (Maybe [W256])
}

data SingleData = SingleData
{ smt2 :: SMT2
, resultChan :: Chan CheckSatResult
}

Expand All @@ -77,21 +95,32 @@ isUnsat :: CheckSatResult -> Bool
isUnsat Unsat = True
isUnsat _ = False

checkMulti :: SolverGroup -> Err SMT2 -> MultiSol -> IO (Maybe [W256])
checkMulti (SolverGroup taskQueue) smt2 multiSol = do
if isLeft smt2 then pure Nothing
else do
-- prepare result channel
resChan <- newChan
-- send task to solver group
writeChan taskQueue (TaskMulti (MultiData (getNonError smt2) multiSol resChan))
-- collect result
readChan resChan

checkSat :: SolverGroup -> Err SMT2 -> IO CheckSatResult
checkSat (SolverGroup taskQueue) script = do
if isLeft script then pure $ Error $ getError script
checkSat (SolverGroup taskQueue) smt2 = do
if isLeft smt2 then pure $ Error $ getError smt2
else do
-- prepare result channel
resChan <- newChan
-- send task to solver group
writeChan taskQueue (Task (getNonError script) resChan)
writeChan taskQueue (TaskSingle (SingleData (getNonError smt2) resChan))
-- collect result
readChan resChan

writeSMT2File :: SMT2 -> Int -> IO ()
writeSMT2File smt2 count =
writeSMT2File :: SMT2 -> String -> IO ()
writeSMT2File smt2 postfix =
let content = formatSMT2 smt2 <> "\n\n(check-sat)"
in T.writeFile ("query-" <> (show count) <> ".smt2") content
in T.writeFile ("query-" <> postfix <> ".smt2") content

withSolvers :: App m => Solver -> Natural -> Natural -> Maybe Natural -> (SolverGroup -> m a) -> m a
withSolvers solver count threads timeout cont = do
Expand All @@ -116,43 +145,107 @@ withSolvers solver count threads timeout cont = do
orchestrate queue avail fileCounter = do
task <- liftIO $ readChan queue
inst <- liftIO $ readChan avail
runTask' <- toIO $ runTask task inst avail fileCounter
runTask' <- case task of
TaskSingle (SingleData smt2 r) -> toIO $ getOneSol smt2 r inst avail fileCounter
TaskMulti (MultiData smt2 multiSol r) -> toIO $ getMultiSol smt2 multiSol r inst avail fileCounter
_ <- liftIO $ forkIO runTask'
orchestrate queue avail (fileCounter + 1)

runTask :: (MonadIO m, ReadConfig m) => Task -> SolverInstance -> Chan SolverInstance -> Int -> m ()
runTask (Task smt2@(SMT2 cmds cexvars ps) r) inst availableInstances fileCounter = do
getMultiSol :: forall m. (MonadIO m, ReadConfig m) => SMT2 -> MultiSol -> (Chan (Maybe [W256])) -> SolverInstance -> Chan SolverInstance -> Int -> m ()
getMultiSol smt2@(SMT2 cmds cexvars _) multiSol r inst availableInstances fileCounter = do
conf <- readConfig
when conf.dumpQueries $ liftIO $ writeSMT2File smt2 (show fileCounter)
-- reset solver and send all lines of provided script
out <- liftIO $ sendScript inst ("(reset)" : cmds)
case out of
Left err -> liftIO $ do
when conf.debug $ putStrLn $ "Unable to write SMT to solver: " <> (T.unpack err)
writeChan r Nothing
Right _ -> do
sat <- liftIO $ sendLine inst "(check-sat)"
subRun [] smt2 sat
-- put the instance back in the list of available instances
liftIO $ writeChan availableInstances inst
where
createHexValue k =
let hexString = concat (replicate k "ff")
in fst . head $ readHex hexString
subRun :: (MonadIO m, ReadConfig m) => [W256] -> SMT2 -> Text -> m ()
subRun vals fullSmt sat = do
conf <- readConfig
let fuzzResult = tryCexFuzz ps conf.numCexFuzz
liftIO $ do
when (conf.dumpQueries) $ writeSMT2File smt2 fileCounter
if (isJust fuzzResult)
then do
when (conf.debug) $ putStrLn $ " Cex found via fuzzing:" <> (show fuzzResult)
writeChan r (Sat $ fromJust fuzzResult)
else if not conf.onlyCexFuzz then do
-- reset solver and send all lines of provided script
out <- sendScript inst (SMT2 ("(reset)" : cmds) mempty ps)
case out of
-- if we got an error then return it
Left e -> writeChan r (Error $ "Error while writing SMT to solver: " <> T.unpack e)
-- otherwise call (check-sat), parse the result, and send it down the result channel
Right () -> do
sat <- sendLine inst "(check-sat)"
res <- do
case sat of
"unsat" -> pure Unsat
"timeout" -> pure $ Unknown "Result timeout by SMT solver"
"unknown" -> pure $ Unknown "Result unknown by SMT solver"
"sat" -> Sat <$> getModel inst cexvars
_ -> pure . Error $ "Unable to parse SMT solver output: " <> T.unpack sat
writeChan r res
case sat of
"unsat" -> liftIO $ do
when conf.debug $ putStrLn $ "No more solutions to query, returning: " <> show vals
liftIO $ writeChan r (Just vals)
"timeout" -> liftIO $ do
when conf.debug $ putStrLn "Timeout inside SMT solver."
writeChan r Nothing
"unknown" -> liftIO $ do
when conf.debug $ putStrLn "Unknown result by SMT solver."
writeChan r Nothing
"sat" -> do
if length vals >= multiSol.maxSols then liftIO $ do
when conf.debug $ putStrLn "Too many solutions to symbolic query."
writeChan r Nothing
else do
when (conf.debug) $ putStrLn "Fuzzing failed to find a Cex, not trying SMT due to onlyCexFuzz"
writeChan r $ Error "Option onlyCexFuzz enabled, not running SMT"

-- put the instance back in the list of available instances
writeChan availableInstances inst
cex <- liftIO $ getModel inst cexvars
case Map.lookup (Var (TStrict.pack multiSol.var)) cex.vars of
Just v -> do
let hexMask = createHexValue multiSol.numBytes
maskedVal = v .&. hexMask
toSMT n = show (into n :: Integer)
maskedVar = "(bvand " <> multiSol.var <> " (_ bv" <> toSMT hexMask <> " 256))"
restrict = "(assert (not (= " <> maskedVar <> " (_ bv" <> toSMT maskedVal <> " 256))))"
newSmt = fullSmt <> SMT2 [(fromString restrict)] mempty mempty
when conf.debug $ liftIO $ putStrLn $ "Got one solution to symbolic query, val: 0x" <> (showHex maskedVal "") <>
" now have " <> show (length vals + 1) <> " solution(s), max is: " <> show multiSol.maxSols
when conf.dumpQueries $ liftIO $ writeSMT2File newSmt (show fileCounter <> "-sol" <> show (length vals))
out <- liftIO $ sendLine inst (T.pack restrict)
case out of
"success" -> do
out2 <- liftIO $ sendLine inst (T.pack "(check-sat)")
subRun (maskedVal:vals) newSmt out2
err -> liftIO $ do
when conf.debug $ putStrLn $ "Unable to write SMT to solver: " <> (T.unpack err)
writeChan r Nothing
Nothing -> internalError $ "variable " <> multiSol.var <> " not part of model (i.e. cex) ... that's not possible"
err -> liftIO $ do
when conf.debug $ putStrLn $ "Unable to write SMT to solver: " <> (T.unpack err)
writeChan r Nothing

getOneSol :: (MonadIO m, ReadConfig m) => SMT2 -> (Chan CheckSatResult) -> SolverInstance -> Chan SolverInstance -> Int -> m ()
getOneSol smt2@(SMT2 cmds cexvars ps) r inst availableInstances fileCounter = do
conf <- readConfig
let fuzzResult = tryCexFuzz ps conf.numCexFuzz
liftIO $ do
when (conf.dumpQueries) $ writeSMT2File smt2 (show fileCounter)
if (isJust fuzzResult)
then do
when (conf.debug) $ putStrLn $ " Cex found via fuzzing:" <> (show fuzzResult)
writeChan r (Sat $ fromJust fuzzResult)
else if Prelude.not conf.onlyCexFuzz then do
-- reset solver and send all lines of provided script
out <- sendScript inst ("(reset)" : cmds)
case out of
-- if we got an error then return it
Left e -> writeChan r (Error $ "Error while writing SMT to solver: " <> T.unpack e)
-- otherwise call (check-sat), parse the result, and send it down the result channel
Right () -> do
sat <- sendLine inst "(check-sat)"
res <- do
case sat of
"unsat" -> pure Unsat
"timeout" -> pure $ EVM.Solvers.Unknown "Result timeout by SMT solver"
"unknown" -> pure $ EVM.Solvers.Unknown "Result unknown by SMT solver"
"sat" -> Sat <$> getModel inst cexvars
_ -> pure . Error $ "Unable to parse SMT solver output: " <> T.unpack sat
writeChan r res
else do
when (conf.debug) $ putStrLn "Fuzzing failed to find a Cex, not trying SMT due to onlyCexFuzz"
writeChan r $ Error "Option onlyCexFuzz enabled, not running SMT"

-- put the instance back in the list of available instances
writeChan availableInstances inst

getModel :: SolverInstance -> CexVars -> IO SMTCex
getModel inst cexvars = do
Expand Down Expand Up @@ -291,8 +384,8 @@ stopSolver :: SolverInstance -> IO ()
stopSolver (SolverInstance _ stdin stdout process) = cleanupProcess (Just stdin, Just stdout, Nothing, process)

-- | Sends a list of commands to the solver. Returns the first error, if there was one.
sendScript :: SolverInstance -> SMT2 -> IO (Either Text ())
sendScript solver (SMT2 cmds _ _) = do
sendScript :: SolverInstance -> [Builder] -> IO (Either Text ())
sendScript solver cmds = do
let sexprs = splitSExpr $ fmap toLazyText cmds
go sexprs
where
Expand Down
Loading