Skip to content
This repository was archived by the owner on Feb 14, 2025. It is now read-only.

Clean up API #12

Merged
merged 6 commits into from
Nov 26, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
First pass cleaning up the API
Josh Watson committed Oct 27, 2018
commit 861930b9c4719a6cf38cfb09896f9753fc735224
304 changes: 0 additions & 304 deletions evm_cfg_builder/cfg.py

This file was deleted.

188 changes: 188 additions & 0 deletions evm_cfg_builder/cfg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import basic_block
import function

import re
from pyevmasm import disassemble_all

__all__ = ["basic_block", "function"]

BASIC_BLOCK_END = [
'STOP',
'SELFDESTRUCT',
'RETURN',
'REVERT',
'INVALID',
'SUICIDE',
'JUMP',
'JUMPI'
]

class ImmutableDict(dict):
def __init__(self, map):
self.update(map)
self.update = self.__update

def __setitem__(self, key, value):
raise KeyError('ImmutableDict is immutable.')

def __update(self, new_dict):
raise NotImplementedError()

class CFG(object):
def __init__(self, bytecode=None):
if bytecode is None:
self.__bytecode = bytes()
else:
self.__bytecode = bytecode

self.__functions = dict()
self.__basic_blocks = dict()
self.__instructions = dict()
self.edges = dict()

@property
def bytecode(self):
return self.__bytecode

@bytecode.setter
def bytecode(self, bytecode):
self.clear()
self.__bytecode = bytecode

def clear(self):
self.__functions = list()
self.__basic_blocks = dict()
self.__instructions = dict()
self.__edges = dict()
self.__bytecode = dict()

def remove_metadata(self):
'''
Init bytecode contains metadata that needs to be removed
see http://solidity.readthedocs.io/en/v0.4.24/metadata.html#encoding-of-the-metadata-hash-in-the-bytecode
'''
self.bytecode = re.sub(
r'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20[\x00-\xff]{32}\x00\x29',
'',
self.bytecode
)

@property
def basic_blocks(self):
return ImmutableDict(self.__basic_blocks)

@property
def functions(self):
return iter(self.__functions)

@property
def instructions(self):
return ImmutableDict(self.__instructions)

def compute_basic_blocks(self):
'''
Split instructions into BasicBlock
Args:
self: CFG
Returns:
None
'''
# Do nothing if basic_blocks already exist
if self.basic_blocks:
return

bb = basic_block.BasicBlock()

for instruction in disassemble_all(self.bytecode):
self.__instructions[instruction.pc] = instruction

if instruction.name == 'JUMPDEST':
# JUMPDEST indicates a new BasicBlock. Set the end pc
# of the current block, and switch to a new one.
if bb.instructions:
self.__basic_blocks[bb.end.pc] = bb

bb = basic_block.BasicBlock()

self.__basic_blocks[instruction.pc] = bb

bb.add_instruction(instruction)

if bb.start.pc == instruction.pc:
self.__basic_blocks[instruction.pc] = bb

if bb.end.name in BASIC_BLOCK_END:
self.__basic_blocks[bb.end.pc] = bb
bb = basic_block.BasicBlock()

def compute_functions(self, block, is_entry_block=False):

function_start, function_hash = is_jump_to_function(block)

if(function_start):
new_function = function.Function(
function_hash,
function_start,
self.__basic_blocks[function_start]
)

self.__functions.append(new_function)

if block.ends_with_jumpi():
false_branch = self.__basic_blocks[block.end.pc + 1]
self.compute_functions(false_branch)

return

elif is_entry_block:
if block.ends_with_jumpi():
false_branch = self.__basic_blocks[block.end.pc + 1]
self.compute_functions(false_branch)

def add_function(self, func):
assert isinstance(func, function.Function)
self.__functions.append(func)

def compute_simple_edges(self):
for bb in self.basic_blocks.items():
if bb.end.name == 'JUMPI':
dst = self.__basic_blocks[bb.end.pc + 1]
bb.add_son(dst)
dst.add_father(bb)

# A bb can be split in the middle if it has a JUMPDEST
# Because another edge can target the JUMPDEST
if bb.end.name not in BASIC_BLOCK_END:
dst = self.__basic_blocks[bb.end.pc + 1 + bb.end.operand_size]
assert dst.start.name == 'JUMPDEST'
bb.add_son(dst)
dst.add_father(bb)

def is_jump_to_function(block):
'''
Heuristic:
Recent solc version add a first check if calldatasize <4 and jump in fallback
Args;
block (BasicBlock)
Returns:
(int): function hash, or None
'''

has_calldata_size = False
last_pushed_value = None
previous_last_pushed_value = None
for i in block.instructions:
if i.name == 'CALLDATASIZE':
has_calldata_size = True

if i.name.startswith('PUSH'):
previous_last_pushed_value = last_pushed_value
last_pushed_value = i.operand

if i.name == 'JUMPI' and has_calldata_size:
return last_pushed_value, -1

if i.name == 'JUMPI' and previous_last_pushed_value:
return last_pushed_value, previous_last_pushed_value

return None, None
57 changes: 57 additions & 0 deletions evm_cfg_builder/cfg/basic_block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
class BasicBlock(object):

def __init__(self):
self.instructions = []
# sons and fathers are dict
# The key is the function hash
# It allows to compute the VSA only
# On a specific function, to separate
# the merging
self.sons = {}
self.fathers = {}

def add_instruction(self, instruction):
self.instructions += [instruction]

def __repr__(self):
return '<cfg BasicBlock@{:x}-{:x}>'.format(self.start.pc, self.end.pc)

@property
def start(self):
return self.instructions[0]

@property
def end(self):
return self.instructions[-1]

def add_son(self, son, key):
if not key in self.sons:
self.sons[key] = []
if son not in self.sons[key]:
self.sons[key].append(son)

def add_father(self, father, key):
if not key in self.fathers:
self.fathers[key] = []
if father not in self.fathers:
self.fathers[key].append(father)

def ends_with_jumpi(self):
return self.end.name == 'JUMPI'

def ends_with_jump_or_jumpi(self):
return self.end.name in ['JUMP', 'JUMPI']

def true_branch(self, key):
assert(self.ends_with_jumpi())

sons = [bb for bb in self.sons[key] if bb.start.pc != (self.end.pc+1)]
assert(len(sons[key]) == 1)
return sons[key][1]

def false_branch(self, key):
assert(self.ends_with_jumpi())

sons = [bb for bb in self.sons[key] if bb.start.pc == (self.end.pc+1)]
assert(len(sons) == 1)
return sons[key][0]
143 changes: 143 additions & 0 deletions evm_cfg_builder/cfg/function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
class Function(object):

def __init__(self, hash_id, start_addr, entry_basic_block):
self._hash_id = hash_id
self._start_addr = start_addr
self._entry = entry_basic_block
self._name = hex(hash_id)
self._basic_blocks = []
self._attributes = []

@property
def hash_id(self):
return self._hash_id

@property
def key(self):
return self.hash_id

@property
def name(self):
return self._name

@name.setter
def name(self, n):
self._name = n

@property
def basic_blocks(self):
'''
Returns
list(BasicBlock)
'''
return self._basic_blocks

@basic_blocks.setter
def basic_blocks(self, bbs):
self._basic_blocks = bbs

@property
def entry(self):
return self._entry

@property
def attributes(self):
"""
Returns
list(str)
"""
return self._attributes

def add_attributes(self, attr):
if not attr in self.attributes:
self._attributes.append(attr)

def check_payable(self):
entry = self.entry
if any(ins.name == 'CALLVALUE' for ins in entry.instructions):
return
self.add_attributes('payable')

def check_view(self):
changing_state_ops = ['CREATE',
'CALL',
'CALLCODE',
'DELEGATECALL',
'SELFDESTRUCT',
'SSTORE']

for bb in self.basic_blocks:
if any(ins.name in changing_state_ops for ins in bb.instructions):
return

self.add_attributes('view')

def check_pure(self):
state_ops = ['CREATE',
'CALL',
'CALLCODE',
'DELEGATECALL',
'SELFDESTRUCT',
'SSTORE',
'ADDRESS',
'BALANCE',
'ORIGIN',
'CALLER',
'CALLVALUE',
'CALLDATALOAD',
'CALLDATASIZE',
'CALLDATACOPY'
'CODESIZE',
'CODECOPY',
'EXTCODESIZE',
'EXTCODECOPY',
'RETURNDATASIZE',
'RETURNDATACOPY',
'BLOCKHASH',
'COINBASE',
'TIMESTAMP',
'NUMBER',
'DIFFICULTY',
'GASLIMIT',
'LOG0', 'LOG1', 'LOG2', 'LOG3', 'LOG4',
'CREATE',
'CALL',
'CALLCODE',
'DELEGATECALL',
'STATICCALL',
'SELFDESTRUCT',
'SSTORE',
'SLOAD']

for bb in self.basic_blocks:
if any(ins.name in state_ops for ins in bb.instructions):
return

self.add_attributes('pure')

def __str__(self):
attrs = ''
if self.attributes:
attrs = ", " + ",".join(self.attributes)
return '{}, {} #bbs {}'.format(self.name, len(self.basic_blocks), attrs)

def output_to_dot(self, base_filename):

with open('{}{}.dot'.format(base_filename, self.name), 'w') as f:
f.write('digraph{\n')
for basic_block in self.basic_blocks:
instructions = ['{}:{}'.format(hex(ins.pc),
str(ins)) for ins in basic_block.instructions]
instructions = '\n'.join(instructions)

f.write('{}[label="{}"]\n'.format(basic_block.start.pc, instructions))

if self.key in basic_block.sons:
for son in basic_block.sons[self.key]:
f.write('{} -> {}\n'.format(basic_block.start.pc, son.start.pc))

elif basic_block.ends_with_jump_or_jumpi():
print('Missing branches {}:{}'.format(self.name,
hex(basic_block.end.pc)))

f.write('\n}')
52 changes: 16 additions & 36 deletions evm_cfg_builder/cfg_builder.py
Original file line number Diff line number Diff line change
@@ -4,39 +4,19 @@

from pyevmasm import disassemble_all

from .cfg import compute_instructions, find_functions, Function
from .evm_helpers import create_dicts_from_basic_blocks
from cfg import CFG
from cfg.function import Function
from .known_hashes import known_hashes
from .value_set_analysis import StackValueAnalysis

DISPATCHER_ID = -2
FALLBACK_ID = -1


def remove_metadata(bytecode):
'''
Init bytecode contains metadata that needs to be removed
see http://solidity.readthedocs.io/en/v0.4.24/metadata.html#encoding-of-the-metadata-hash-in-the-bytecode
'''
return re.sub(
r'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20[\x00-\xff]{32}\x00\x29',
'',
bytecode
)
def get_info(cfg):
cfg.add_function(Function(DISPATCHER_ID, 0, cfg.basic_blocks[0]))


def get_info(bytecode):
instructions = disassemble_all(bytecode)
basic_blocks = compute_instructions(instructions)
basic_blocks_as_dict, nodes_as_dict = create_dicts_from_basic_blocks(
basic_blocks
)

functions = find_functions(basic_blocks[0], basic_blocks_as_dict, True)
dispatcher = Function(-2, 0, basic_blocks_as_dict[0])
functions = [dispatcher] + functions

for function in functions:
for function in cfg.functions:
if function.hash_id == FALLBACK_ID:
function.name = '_fallback'
elif function.hash_id == DISPATCHER_ID:
@@ -45,23 +25,21 @@ def get_info(bytecode):
if function.hash_id in known_hashes:
function.name = known_hashes[function.hash_id]

for function in functions:
for function in cfg.functions:
vsa = StackValueAnalysis(
function.entry,
basic_blocks_as_dict,
nodes_as_dict,
cfg.basic_blocks,
cfg.instructions,
function.hash_id
)
bbs = vsa.analyze()

function.basic_blocks = [basic_blocks_as_dict[bb] for bb in bbs]
function.basic_blocks = [cfg.basic_blocks[bb] for bb in bbs]

function.check_payable()
function.check_view()
function.check_pure()

return functions

def output_to_dot(functions):
for function in functions:
function.output_to_dot('test_')
@@ -71,13 +49,15 @@ def main():

with open(filename) as f:
bytecode = f.read().replace('\n','')
bytecode = binascii.unhexlify(bytecode)
bytecode = remove_metadata(bytecode)
functions = get_info(bytecode)
cfg = CFG(binascii.unhexlify(bytecode))
cfg.remove_metadata()
cfg.compute_basic_blocks()
cfg.compute_functions(cfg.basic_blocks[0])
get_info(cfg)
print('End of analysis')
for function in functions:
for function in cfg.functions:
print(function)
output_to_dot(functions)
output_to_dot(cfg.functions)


if __name__ == '__main__':
30 changes: 0 additions & 30 deletions evm_cfg_builder/evm_helpers.py

This file was deleted.