From b472e8cdcd54531b890b8078942a782bb085c13b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Czajka?= <62751+lukaszcz@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:03:27 +0200 Subject: [PATCH] Cairo disassembler (#2710) Implements a disassembler which converts Cairo bytecode into textual CASM representation. Useful for debugging the Cairo backend. * Adds the `juvix dev casm from-cairo` command --- app/Commands/Dev/Casm.hs | 2 + app/Commands/Dev/Casm/FromCairo.hs | 24 +++ app/Commands/Dev/Casm/FromCairo/Options.hs | 15 ++ app/Commands/Dev/Casm/Options.hs | 14 +- .../Compiler/Backend/Cairo/Data/Result.hs | 52 ++++++ .../Backend/Cairo/Extra/Deserialization.hs | 91 ++++++++++ src/Juvix/Compiler/Backend/Cairo/Language.hs | 6 + src/Juvix/Compiler/Casm/Language/Base.hs | 1 + .../Compiler/Casm/Translation/FromCairo.hs | 167 ++++++++++++++++++ 9 files changed, 371 insertions(+), 1 deletion(-) create mode 100644 app/Commands/Dev/Casm/FromCairo.hs create mode 100644 app/Commands/Dev/Casm/FromCairo/Options.hs create mode 100644 src/Juvix/Compiler/Backend/Cairo/Extra/Deserialization.hs create mode 100644 src/Juvix/Compiler/Casm/Translation/FromCairo.hs diff --git a/app/Commands/Dev/Casm.hs b/app/Commands/Dev/Casm.hs index 94a563a1b3..c65c8e1965 100644 --- a/app/Commands/Dev/Casm.hs +++ b/app/Commands/Dev/Casm.hs @@ -2,6 +2,7 @@ module Commands.Dev.Casm where import Commands.Base import Commands.Dev.Casm.Compile as Compile +import Commands.Dev.Casm.FromCairo as FromCairo import Commands.Dev.Casm.Options import Commands.Dev.Casm.Read as Read import Commands.Dev.Casm.Run as Run @@ -11,3 +12,4 @@ runCommand = \case Compile opts -> Compile.runCommand opts Run opts -> Run.runCommand opts Read opts -> Read.runCommand opts + FromCairo opts -> FromCairo.runCommand opts diff --git a/app/Commands/Dev/Casm/FromCairo.hs b/app/Commands/Dev/Casm/FromCairo.hs new file mode 100644 index 0000000000..09df338017 --- /dev/null +++ b/app/Commands/Dev/Casm/FromCairo.hs @@ -0,0 +1,24 @@ +module Commands.Dev.Casm.FromCairo where + +import Commands.Base +import Commands.Dev.Casm.FromCairo.Options +import Data.Aeson +import Data.ByteString.Lazy qualified as BS +import Juvix.Compiler.Backend.Cairo.Extra.Deserialization qualified as Cairo +import Juvix.Compiler.Casm.Data.Result qualified as Casm +import Juvix.Compiler.Casm.Pretty qualified as Casm +import Juvix.Compiler.Casm.Translation.FromCairo qualified as Casm + +runCommand :: forall r. (Members '[EmbedIO, App] r) => CasmFromCairoOptions -> Sem r () +runCommand opts = do + afile :: Path Abs File <- fromAppPathFile file + bs <- liftIO $ BS.readFile (toFilePath afile) + case decode bs of + Just r -> do + let Casm.Result {..} = Casm.fromCairo (Cairo.deserialize r) + renderStdOut (Casm.ppProgram _resultCode) + Nothing -> + exitFailMsg "error reading input file" + where + file :: AppPath File + file = opts ^. casmFromCairoInputFile diff --git a/app/Commands/Dev/Casm/FromCairo/Options.hs b/app/Commands/Dev/Casm/FromCairo/Options.hs new file mode 100644 index 0000000000..415f22c24c --- /dev/null +++ b/app/Commands/Dev/Casm/FromCairo/Options.hs @@ -0,0 +1,15 @@ +module Commands.Dev.Casm.FromCairo.Options where + +import CommonOptions + +newtype CasmFromCairoOptions = CasmFromCairoOptions + { _casmFromCairoInputFile :: AppPath File + } + deriving stock (Data) + +makeLenses ''CasmFromCairoOptions + +parseCasmFromCairoOptions :: Parser CasmFromCairoOptions +parseCasmFromCairoOptions = do + _casmFromCairoInputFile <- parseInputFile FileExtJson + pure CasmFromCairoOptions {..} diff --git a/app/Commands/Dev/Casm/Options.hs b/app/Commands/Dev/Casm/Options.hs index c0ae09050a..4c0f32c214 100644 --- a/app/Commands/Dev/Casm/Options.hs +++ b/app/Commands/Dev/Casm/Options.hs @@ -1,6 +1,7 @@ module Commands.Dev.Casm.Options where import Commands.Dev.Casm.Compile.Options +import Commands.Dev.Casm.FromCairo.Options import Commands.Dev.Casm.Read.Options import Commands.Dev.Casm.Run.Options import CommonOptions @@ -9,6 +10,7 @@ data CasmCommand = Compile CompileOptions | Run CasmRunOptions | Read CasmReadOptions + | FromCairo CasmFromCairoOptions deriving stock (Data) parseCasmCommand :: Parser CasmCommand @@ -17,7 +19,8 @@ parseCasmCommand = mconcat [ commandCompile, commandRun, - commandRead + commandRead, + commandFromCairo ] where commandCompile :: Mod CommandFields CasmCommand @@ -29,6 +32,9 @@ parseCasmCommand = commandRead :: Mod CommandFields CasmCommand commandRead = command "read" readInfo + commandFromCairo :: Mod CommandFields CasmCommand + commandFromCairo = command "from-cairo" fromCairoInfo + compileInfo :: ParserInfo CasmCommand compileInfo = info @@ -46,3 +52,9 @@ parseCasmCommand = info (Read <$> parseCasmReadOptions) (progDesc "Parse a CASM file and pretty print it") + + fromCairoInfo :: ParserInfo CasmCommand + fromCairoInfo = + info + (FromCairo <$> parseCasmFromCairoOptions) + (progDesc "Disassemble Cairo bytecode into CASM") diff --git a/src/Juvix/Compiler/Backend/Cairo/Data/Result.hs b/src/Juvix/Compiler/Backend/Cairo/Data/Result.hs index ba735b34f0..e099119e98 100644 --- a/src/Juvix/Compiler/Backend/Cairo/Data/Result.hs +++ b/src/Juvix/Compiler/Backend/Cairo/Data/Result.hs @@ -1,9 +1,12 @@ module Juvix.Compiler.Backend.Cairo.Data.Result where import Data.Aeson as Aeson hiding (Result) +import Data.Aeson.Key qualified as Aeson +import Data.Aeson.KeyMap qualified as KeyMap import Data.Aeson.Types hiding (Result) import Data.Vector qualified as V import Juvix.Prelude hiding ((.=)) +import Text.Read qualified as T data Result = Result { _resultData :: [Text], @@ -67,3 +70,52 @@ instance ToJSON Result where "reference_ids" .= object [] ] ] + +instance FromJSON Result where + parseJSON val = case val of + Object obj -> do + _resultData <- maybe (return []) parseJSON (KeyMap.lookup "data" obj) + _resultBuiltins <- maybe (return []) parseJSON (KeyMap.lookup "builtins" obj) + (_resultStart, _resultEnd, _resultMain) <- maybe (return (0, 0, 0)) parseIdents (KeyMap.lookup "identifiers" obj) + _resultHints <- maybe (return []) parseHints (KeyMap.lookup "hints" obj) + return Result {..} + _ -> + typeMismatch "Object" val + where + parseIdents :: Value -> Parser (Int, Int, Int) + parseIdents val' = case val' of + Object obj -> do + startPc <- maybe (return 0) parseIdentPc (KeyMap.lookup "__main__.__start__" obj) + endPc <- maybe (return 0) parseIdentPc (KeyMap.lookup "__main__.__end__" obj) + mainPc <- maybe (return 0) parseIdentPc (KeyMap.lookup "__main__.main" obj) + return (startPc, endPc, mainPc) + _ -> + typeMismatch "Object" val' + + parseIdentPc :: Value -> Parser Int + parseIdentPc val' = case val' of + Object obj -> + maybe (return 0) parseJSON (KeyMap.lookup "pc" obj) + _ -> + typeMismatch "Object" val' + + parseHints :: Value -> Parser [(Int, Text)] + parseHints val' = case val' of + Object obj -> do + lst <- + forM (KeyMap.toList obj) $ \(k, v) -> do + v' <- parseHint v + return (Aeson.toText k, v') + mapM (firstM (maybe (typeMismatch "Integer" val') return . T.readMaybe . unpack)) lst + _ -> + typeMismatch "Object" val' + + parseHint :: Value -> Parser Text + parseHint val' = case val' of + Array arr -> case toList arr of + [Object obj] -> + maybe (return "") parseJSON (KeyMap.lookup "code" obj) + _ -> + typeMismatch "singleton Array" val' + _ -> + typeMismatch "singleton Array" val' diff --git a/src/Juvix/Compiler/Backend/Cairo/Extra/Deserialization.hs b/src/Juvix/Compiler/Backend/Cairo/Extra/Deserialization.hs new file mode 100644 index 0000000000..705401778f --- /dev/null +++ b/src/Juvix/Compiler/Backend/Cairo/Extra/Deserialization.hs @@ -0,0 +1,91 @@ +module Juvix.Compiler.Backend.Cairo.Extra.Deserialization where + +import Data.Bits +import Juvix.Compiler.Backend.Cairo.Data.Result +import Juvix.Compiler.Backend.Cairo.Language +import Numeric + +deserialize :: Result -> [Element] +deserialize Result {..} = go [] (map (fromHexText . unpack) _resultData) + where + fromHexText :: String -> Natural + fromHexText s + | isPrefixOf "0x" s = case readHex (drop 2 s) of + [(n, "")] -> n + _ -> error ("error parsing field element: " <> pack s) + | otherwise = error ("not a hexadecimal number: " <> pack s) + + go :: [Element] -> [Natural] -> [Element] + go acc = \case + [] -> + reverse acc + e : elems -> + case instr ^. instrOp1Src of + Op1SrcImm -> case elems of + [] -> error "expected an immediate" + e' : elems' -> + go (ElementImmediate f : ElementInstruction instr : acc) elems' + where + f = fieldFromInteger cairoFieldSize (toInteger e') + _ -> + go (ElementInstruction instr : acc) elems + where + instr = + Instruction + { _instrOffDst = fromBiasedRepr (e .&. 0xFFFF), + _instrOffOp0 = fromBiasedRepr (shiftR e 16 .&. 0xFFFF), + _instrOffOp1 = fromBiasedRepr (shiftR e 32 .&. 0xFFFF), + _instrDstReg = goReg (testBit e 48), + _instrOp0Reg = goReg (testBit e 49), + _instrOp1Src = goOp1Src (shiftR e 50 .&. 0x7), + _instrResLogic = goResLogic (shiftR e 53 .&. 0x3), + _instrPcUpdate = goPcUpdate (shiftR e 55 .&. 0x7), + _instrApUpdate = goApUpdate (shiftR e 58 .&. 0x3), + _instrOpcode = goOpcode (shiftR e 60 .&. 0x7) + } + + fromBiasedRepr :: Natural -> Offset + fromBiasedRepr e = fromIntegral (toInteger e - (2 :: Integer) ^ (15 :: Integer)) + + goReg :: Bool -> Reg + goReg = \case + False -> Ap + True -> Fp + + goOp1Src :: Natural -> Op1Src + goOp1Src = \case + 0 -> Op1SrcOp0 + 1 -> Op1SrcImm + 2 -> Op1SrcFp + 4 -> Op1SrcAp + _ -> error "invalide op1_src" + + goResLogic :: Natural -> ResLogic + goResLogic = \case + 0 -> ResOp1 + 1 -> ResAdd + 2 -> ResMul + _ -> error "invalid res_logic" + + goPcUpdate :: Natural -> PcUpdate + goPcUpdate = \case + 0 -> PcUpdateRegular + 1 -> PcUpdateJump + 2 -> PcUpdateJumpRel + 4 -> PcUpdateJnz + _ -> error "invalid pc_update" + + goApUpdate :: Natural -> ApUpdate + goApUpdate = \case + 0 -> ApUpdateRegular + 1 -> ApUpdateAdd + 2 -> ApUpdateInc + _ -> error "invalid ap_update" + + goOpcode :: Natural -> Opcode + goOpcode = \case + 0 -> Nop + 1 -> Call + 2 -> Ret + 4 -> AssertEq + _ -> error "invalid opcode" diff --git a/src/Juvix/Compiler/Backend/Cairo/Language.hs b/src/Juvix/Compiler/Backend/Cairo/Language.hs index 18587290f3..8012617ea6 100644 --- a/src/Juvix/Compiler/Backend/Cairo/Language.hs +++ b/src/Juvix/Compiler/Backend/Cairo/Language.hs @@ -41,34 +41,40 @@ data Instruction = Instruction _instrApUpdate :: ApUpdate, _instrOpcode :: Opcode } + deriving stock (Show) data Op1Src = Op1SrcOp0 | Op1SrcImm | Op1SrcFp | Op1SrcAp + deriving stock (Eq, Show) data ResLogic = ResOp1 | ResAdd | ResMul + deriving stock (Eq, Show) data PcUpdate = PcUpdateRegular | PcUpdateJump | PcUpdateJumpRel | PcUpdateJnz + deriving stock (Eq, Show) data ApUpdate = ApUpdateRegular | ApUpdateAdd | ApUpdateInc + deriving stock (Eq, Show) data Opcode = Nop | Call | Ret | AssertEq + deriving stock (Eq, Show) defaultInstruction :: Instruction defaultInstruction = diff --git a/src/Juvix/Compiler/Casm/Language/Base.hs b/src/Juvix/Compiler/Casm/Language/Base.hs index 84946d7e37..29de20502b 100644 --- a/src/Juvix/Compiler/Casm/Language/Base.hs +++ b/src/Juvix/Compiler/Casm/Language/Base.hs @@ -13,3 +13,4 @@ type Address = Int data Reg = Ap | Fp + deriving stock (Eq, Show) diff --git a/src/Juvix/Compiler/Casm/Translation/FromCairo.hs b/src/Juvix/Compiler/Casm/Translation/FromCairo.hs new file mode 100644 index 0000000000..5375fa6750 --- /dev/null +++ b/src/Juvix/Compiler/Casm/Translation/FromCairo.hs @@ -0,0 +1,167 @@ +module Juvix.Compiler.Casm.Translation.FromCairo where + +import Juvix.Compiler.Backend.Cairo.Language qualified as Cairo +import Juvix.Compiler.Casm.Data.Result +import Juvix.Compiler.Casm.Language + +fromCairo :: [Cairo.Element] -> Result +fromCairo elems0 = Result mempty (go 0 [] elems0) + where + errorMsg :: Address -> Text -> a + errorMsg addr msg = error ("error at address " <> show addr <> ": " <> msg) + + go :: Address -> [Instruction] -> [Cairo.Element] -> [Instruction] + go addr acc = \case + [] -> reverse acc + Cairo.ElementInstruction instr : elems -> + go (addr + delta + 1) (i : acc) (drop delta elems) + where + (i, delta) = goInstr addr instr elems + Cairo.ElementImmediate {} : _ -> errorMsg addr "cannot disassemble an immediate value" + Cairo.ElementHint {} : _ -> errorMsg addr "cannot disassemble a hint" + + goInstr :: Address -> Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goInstr addr instr1 elems1 = case instr1 ^. Cairo.instrOpcode of + Cairo.Call -> goCall instr1 elems1 + Cairo.Ret -> goRet instr1 elems1 + Cairo.AssertEq -> goAssertEq instr1 elems1 + Cairo.Nop -> goNop instr1 elems1 + where + getImm :: Cairo.Element -> Immediate + getImm = \case + Cairo.ElementImmediate imm -> Cairo.fieldToInteger imm + _ -> errorMsg (addr + 1) "expected an immediate value" + + getOp1Val :: RValue -> Value + getOp1Val = \case + Val val -> val + _ -> errorMsg addr ("unexpected op1 load: " <> show instr1) + + decodeDst :: Cairo.Instruction -> MemRef + decodeDst Cairo.Instruction {..} = MemRef _instrDstReg _instrOffDst + + decodeRes :: Cairo.Instruction -> [Cairo.Element] -> (RValue, Int) + decodeRes Cairo.Instruction {..} elems = (res, delta) + where + delta = if _instrOp1Src == Cairo.Op1SrcImm then 1 else 0 + op0 = MemRef _instrOp0Reg _instrOffOp0 + op1 = case _instrOp1Src of + Cairo.Op1SrcOp0 -> Load $ LoadValue op0 _instrOffOp1 + Cairo.Op1SrcAp -> Val $ Ref $ MemRef Ap _instrOffOp1 + Cairo.Op1SrcFp -> Val $ Ref $ MemRef Fp _instrOffOp1 + Cairo.Op1SrcImm -> Val $ Imm $ getImm (head' elems) + res = case _instrResLogic of + Cairo.ResOp1 -> op1 + Cairo.ResAdd -> Binop $ BinopValue FieldAdd op0 (getOp1Val op1) + Cairo.ResMul -> Binop $ BinopValue FieldMul op0 (getOp1Val op1) + + goCall :: Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goCall i@Cairo.Instruction {..} elems + | (_instrPcUpdate == Cairo.PcUpdateJump || _instrPcUpdate == Cairo.PcUpdateJumpRel) + && _instrApUpdate == Cairo.ApUpdateRegular + && _instrDstReg == Ap + && _instrOffDst == 0 = + (call, delta) + | otherwise = + errorMsg addr ("invalid call: " <> show i) + where + call = + Call + InstrCall + { _instrCallRel = _instrPcUpdate == Cairo.PcUpdateJumpRel, + _instrCallTarget = val + } + (rval, delta) = decodeRes i elems + val = case rval of + Val v -> v + _ -> errorMsg addr ("invalid call: " <> show i) + + goRet :: Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goRet i@Cairo.Instruction {..} _ + | _instrApUpdate == Cairo.ApUpdateRegular + && _instrPcUpdate == Cairo.PcUpdateJump + && _instrResLogic == Cairo.ResOp1 + && _instrOp1Src == Cairo.Op1SrcFp + && _instrOffOp1 == -1 + && _instrDstReg == Fp + && _instrOffDst == -2 = + (Return, 0) + | otherwise = + errorMsg addr ("invalid ret: " <> show i) + + goAssertEq :: Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goAssertEq i@Cairo.Instruction {..} elems + | (_instrApUpdate == Cairo.ApUpdateInc || _instrApUpdate == Cairo.ApUpdateRegular) + && _instrPcUpdate == Cairo.PcUpdateRegular = + (asng, delta) + | otherwise = + errorMsg addr ("invalid assignment: " <> show i) + where + (rval, delta) = decodeRes i elems + dst = decodeDst i + asng = + Assign + InstrAssign + { _instrAssignResult = dst, + _instrAssignValue = rval, + _instrAssignIncAp = _instrApUpdate == Cairo.ApUpdateInc + } + + goNop :: Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goNop i@Cairo.Instruction {..} elems = + case _instrPcUpdate of + Cairo.PcUpdateJnz -> goJumpIf i elems + Cairo.PcUpdateJump -> goJump False i elems + Cairo.PcUpdateJumpRel -> goJump True i elems + Cairo.PcUpdateRegular -> case _instrApUpdate of + Cairo.ApUpdateAdd -> goAlloc i elems + _ -> errorMsg addr ("cannot disassemble: " <> show i) + + goJump :: Bool -> Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goJump isRel i@Cairo.Instruction {..} elems + | (_instrApUpdate == Cairo.ApUpdateInc || _instrApUpdate == Cairo.ApUpdateRegular) = + (jmp, delta) + | otherwise = + errorMsg addr ("invalid jump: " <> show i) + where + (res, delta) = decodeRes i elems + jmp = + Jump + InstrJump + { _instrJumpTarget = res, + _instrJumpRel = isRel, + _instrJumpIncAp = _instrApUpdate == Cairo.ApUpdateInc + } + + goJumpIf :: Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goJumpIf i@Cairo.Instruction {..} elems + | (_instrApUpdate == Cairo.ApUpdateInc || _instrApUpdate == Cairo.ApUpdateRegular) = + case res of + Val val -> + (jmp val, delta) + _ -> + errorMsg addr ("cannot disassemble conditional jump: " <> show i) + | otherwise = + errorMsg addr ("invalid conditional jump: " <> show i) + where + (res, delta) = decodeRes i elems + dst = decodeDst i + + jmp :: Value -> Instruction + jmp tgt = + JumpIf + InstrJumpIf + { _instrJumpIfTarget = tgt, + _instrJumpIfValue = dst, + _instrJumpIfIncAp = _instrApUpdate == Cairo.ApUpdateInc + } + + goAlloc :: Cairo.Instruction -> [Cairo.Element] -> (Instruction, Int) + goAlloc i elems = (alloc, delta) + where + (res, delta) = decodeRes i elems + alloc = + Alloc + InstrAlloc + { _instrAllocSize = res + }