Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Opportunistic state merging for Manticore #1351

Closed
wants to merge 16 commits into from
Closed
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
fe233dd
Skeleton implementation for state merging in Manticore
vaibhavbsharma Jan 11, 2019
f21a8d4
Implementing input, output sockets comparison for states + adding an …
vaibhavbsharma Jan 12, 2019
3abe783
Setting the random seed from the Python script instead of hard-coding…
vaibhavbsharma Jan 12, 2019
317042d
Adding initial implementation of memory comparison for state merging
vaibhavbsharma Jan 15, 2019
1f24e9c
Merge remote-tracking branch 'origin/master' into dev-state-merging
vaibhavbsharma Jan 15, 2019
ee29abf
Fixing a few issues to address comments from Yan and Dominik
vaibhavbsharma Jan 15, 2019
9dbdca1
Finishing the `is_merge_possible` predicate's implementation by finis…
vaibhavbsharma Jan 16, 2019
5de9436
Finishing off a basic implementation of opportunistic state merging b…
vaibhavbsharma Jan 17, 2019
a1b48c4
Setting the merged constraint in the merged state
vaibhavbsharma Jan 17, 2019
bc7af36
Dev state merging loadsave (#1357)
feliam Jan 17, 2019
e871f72
Integrating @feliam's changes in so that the basic_statemerging.py ex…
vaibhavbsharma Jan 18, 2019
cf5afff
Minor changes to get both Manticore CLI and script-based example via …
vaibhavbsharma Jan 18, 2019
c4c5c8a
Avoid unnecessary CPU register merging using the solver + explaining …
vaibhavbsharma Jan 18, 2019
8c01cd8
Merge remote-tracking branch 'origin/master' into dev-state-merging
vaibhavbsharma Jan 18, 2019
13e3ab6
Fixing code climate issues + adding docstrings for a Merger Plugin
vaibhavbsharma Jan 18, 2019
fa55715
Updating the description in basic_statemerging.c and .py which is the…
vaibhavbsharma Jan 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions examples/linux/basic_statemerging.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Symbolic values are read from stdin using standard libc calls.
* Program compares if two binary packed integers at the input with 0x41.
*
* Compile with :
* $ gcc -static -Os basic_statemerging.c -o basic_statemerging
*
* Analyze it with:
* $ python examples/script/basic_statemerging.py examples/linux/basic_statemerging
*
* The Merger plugin used in basic_statemerging.py will find two states with state IDs 2, 4 to be at the same program
* location (0x40060d) and merge their CPU states which should only require the value for RDI to be merged.
*
* Expected output:
* $ python /Users/vaibhav/git_repos/manticore/examples/script/basic_statemerging.py examples/linux/basic_statemerging-Os
about to load state_id = 0
loaded state_id = 0 at cpu = 0x4008e0
about to load state_id = 1
loaded state_id = 1 at cpu = 0x400604
about to load state_id = 2
Merged registers:
RDI
at PC = 0x40060d, merge succeeded for state id = 2 and 4
loaded state_id = 2 at cpu = 0x40060d
about to load state_id = 3
loaded state_id = 3 at cpu = 0x400612
*
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>

int main(int argc, char* argv[], char* envp[]){
unsigned int cmd1, cmd2;
unsigned int cmdChanged = 0;

if (read(0, &cmd1, sizeof(cmd1)) != sizeof(cmd1))
{
printf("Error reading stdin!");
exit(-1);
}
if (read(0, &cmd2, sizeof(cmd2)) != sizeof(cmd2))
{
printf("Error reading stdin!");
exit(-1);
}

if (cmd1 > 0x41)
{
cmdChanged = cmd1 - 0x42;
}
if (cmd2 < 0x41)
{
cmdChanged = cmd2 + 0x42;
}

if (cmdChanged == 0) printf("equal\n");
else printf("not equal\n");

return 0;
}


29 changes: 29 additions & 0 deletions examples/script/basic_statemerging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import sys

from manticore.core.plugin import Merger
from manticore.utils import config

from manticore.native import Manticore
'''
Demonstrates the ability to do state merging on a simple program by merging states with id 2, 4 that happen to be
at the same program location 0x40060d. This script uses the Merger plugin to apply opportunistic state merging.
'''
if __name__ == '__main__':
consts = config.get_group('executor')
consts.seed = 2
path = sys.argv[1]
m = Manticore(path, policy='random')

def will_load_state_callback(_, state_id):
print("about to load state_id = " + str(state_id))

def did_load_state_callback(_, state, state_id):
print("loaded state_id = " + str(state_id) + " at cpu = " + hex(state.cpu.PC))

m.subscribe('will_load_state', will_load_state_callback)
m.subscribe('did_load_state', did_load_state_callback)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loud thinking: do we want users to directly subscribe for events? @feliam

m.register_plugin(Merger())
m.run()

19 changes: 19 additions & 0 deletions manticore/core/executor.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
import logging
import signal

from .plugin import Plugin
from ..exceptions import ExecutorError, SolverError
from ..utils.nointerrupt import WithKeyboardInterruptAs
from ..utils.event import Eventful
@@ -340,6 +341,24 @@ def get(self):
del self._states[self._states.index(state_id)]
return state_id

@sync
def _load_state(self, state_id, delete=False):
if delete:
if state_id in self._states:
del self._states[self._states.index(state_id)]
return self._workspace.load_state(state_id, delete=delete)

@sync
def _delete_state(self, state_id):
if state_id in self._states:
del self._states[self._states.index(state_id)]
return self._workspace.rm_state(state_id)

@sync
def _replace_state(self, state_id, state):
# self._workspace.rm_state(state_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete comment?

self._workspace.save_state(state, state_id)

def list(self):
''' Returns the list of states ids currently queued '''
return list(self._states)
100 changes: 100 additions & 0 deletions manticore/core/plugin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from contextlib import contextmanager

from .state_merging import merge_constraints, is_merge_possible, merge
from ..utils.helpers import issymbolic

logger = logging.getLogger(__name__)
@@ -308,3 +309,102 @@ def will_write_register_callback(self, state, register, value):

def did_write_register_callback(self, state, register, value):
logger.info('did_write_register %r %r %r', state, register, value)


class Merger(Plugin):
def load_state(self, state_id, delete=False):
'''
Loads a state in Manticore with state-id = `state_id` by using the corresponding API in Executor
:param state_id: state-id for state that is being loaded
:param delete: If set to True, deletes the state with state-id = `state_id`
:return: None
'''
return self.manticore._executor._load_state(state_id, delete)

def delete_state(self, state_id):
'''
Deletes a state in Manticore with state-id = `state_id` by using the corresponding API in Executor
:param state_id: state-id for state that is being deleted
:return: None
'''
return self.manticore._executor._delete_state(state_id)

def replace_state(self, state_id, state):
'''
Replaces a state in Manticore with state-id = `state_id` by using the corresponding API in Executor
:param state_id: state-id for state that is being replaced
:param state: State object that is replacing the existing state with `state_id`
:return: None
'''
return self.manticore._executor._replace_state(state_id, state)

def did_enqueue_state_callback(self, state_id, state):
'''
Maintains a `cpu_stateid_dict` in context that keeps maps program counter values to a list of state-id values
for states that will execute the instruction that that program counter as the next instruction
:param state_id: id for state that was just enqueued
:param state: State object that was just enqueued
:return: None
'''
# when a new state is addded to the list we save it so we do not have
# to repload all states when try to merges last PC
with self.locked_context('cpu_stateid_dict', dict) as cpu_stateid_dict:
# as we may be running in a different process we need to access this
# on a lock and over shared memory like this
state_id_list = cpu_stateid_dict.get(state.cpu.PC, list())
state_id_list.append(state_id)
cpu_stateid_dict[state.cpu.PC] = state_id_list

def will_load_state_callback(self, current_state_id):
'''
Checks if the state to be loaded (referenced by `current_state_id` can be merged with another currently enqueued
state and replaced by the merged state
:param current_state_id: state about to be loaded
:return: None
'''
# When a state is loaded for exploration lets check if we can find it a
# mate for merging
with self.locked_context('cpu_stateid_dict') as cpu_stateid_dict:
# we get the lock and get a copy of the shared context
merged_state = self.load_state(current_state_id)
states_at_pc = cpu_stateid_dict.get(merged_state.cpu.PC, [])

# lets remove ourself from the list of waiting states
assert current_state_id in states_at_pc
states_at_pc.remove(current_state_id)

#Iterate over all remaining states that are waiting for exploration
#at the same PC
merged_ids = []
for new_state_id in states_at_pc:
new_state = self.load_state(new_state_id)
(exp_merged_state, exp_new_state, merged_constraint) = merge_constraints(merged_state.constraints, new_state.constraints)
is_mergeable, reason = is_merge_possible(merged_state, new_state, merged_constraint)

if is_mergeable:
#Ok we'll merge it!
merged_state = merge(merged_state, new_state, exp_merged_state, merged_constraint)

#lets remove the vestigial links to the old state
self.delete_state(new_state_id)

merged_ids.append(new_state_id)
is_mergeable = "succeeded"
else:
is_mergeable = "failed because of " + reason
debug_string = "at PC = " + hex(merged_state.cpu.PC) + \
", merge " + is_mergeable + " for state id = " + \
str(current_state_id) + " and " + str(new_state_id)
print(debug_string)

for i in merged_ids:
states_at_pc.remove(i)

cpu_stateid_dict[merged_state.cpu.PC] = states_at_pc

#Ok so we have merged current_state_id with {merged_ids}
#And removed all merged_ids from everywhere

#UGLY we are replacing a state_id. This may be breaking caches in
#the future
self.replace_state(current_state_id, merged_state)
214 changes: 214 additions & 0 deletions manticore/core/state_merging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@

from .smtlib import solver, ConstraintSet, Operators, issymbolic, BitVec


def compare_sockets(cs, socket1, socket2):
'''
This method compares Socket objects for equality using the buffer and peer attributes.
It uses `compare_buffers` for checking buffer attributes for equality.
It calls itself for comparing peer Socket objects.
Returns True if the Socket objects are equal, false otherwise.
:param cs: ConstraintSet to be used for checking Socket.buffer for semantic equality using `solver.must_be_true()`
:param socket1: one of two Socket objects to be compared for equality against socket2
:param socket2: one of two Socket objects to be compared for equality against socket1
:return: True, if the Socket objects are found to be equal, False otherwise
'''
if socket1 is None:
return socket2 is None
if socket2 is None:
return socket1 is None
if not compare_buffers(cs, socket1.buffer, socket2.buffer):
return False
return compare_sockets(cs, socket1.peer, socket2.peer)


def compare_buffers(cs, buffer1, buffer2):
'''
This method compares the two List objects for equality using the `solver.must_be_true()` call.
:param cs: ConstraintSet to be used for checking buffer1 for semantic equality with buffer2 using `solver.must_be_true()`
:param buffer1: one of two List objects to be compared for equality against buffer2
:param buffer2: one of two List objects to be compared for equality against buffer1
:return: True, if the List objects are equal, False otherwise
'''
if len(buffer1) != len(buffer2):
return False
for b1, b2 in zip(buffer1, buffer2):
if not solver.must_be_true(cs, b1 == b2):
return False
return True


def merge_constraints(constraints1, constraints2):
'''
:param constraints1: one of two ConstraintSet objects to be merged
:param constraints2: second of two ConstraintSet objects to be merged
:return: (Expression, Expression, ConstraintSet) where the first and second Expression objects are conjunctions of
of all the constraints in constraints1 and constraints2 respectively. The ConstraintSet is an object that contains
a single constraint that is a logical OR of these two Expression objects.
'''
exp1 = constraints1.constraints[0]
for i in range(1, len(constraints1.constraints)):
exp1 = exp1 & constraints1.constraints[i]
exp2 = constraints2.constraints[0]
for i in range(1, len(constraints2.constraints)):
exp2 = exp2 & constraints2.constraints[i]
merged_constraint = ConstraintSet()
merged_constraint.add(exp1 | exp2)
return exp1, exp2, merged_constraint


def compare_byte_vals(mem1, mem2, addr, merged_constraint):
'''
Compares values in memory at address `addr`, returns True if they are semantically equal, False otherwise
:param mem1: first of two memory objects we want to use for comparison
:param mem2: second of two memory objects we want to use for comparison
:param addr: address at which bytes values are to be compared
:param merged_constraint: ConstraintSet to be used when using the call to `solver.must_be_true()`
:return: returns True if 1 byte values at address `addr` in `mem1` and `mem2` are semantically equal, False otherwise
'''
val1 = mem1.read(addr, 1)
val2 = mem2.read(addr, 1)
# since we only read a single byte value, these lists should only have one entry in them
assert len(val1) == 1 and len(val2) == 1
cond_to_check = (val1[0] == val2[0])
if not solver.must_be_true(merged_constraint, cond_to_check):
return False
else:
return True


#TODO move this comparison into an Executor API that uses an internal State API
def compare_mem(mem1, mem2, merged_constraint):
'''
This method compares the number of maps, and then their names, permissions, start, and end values.
If they all match, then it compares the concrete byte values for equality.
If those match too, it then compares _symbols attribute values for equality if the two memory objects are of
type SMemory.
:param mem1: one of two memory objects to be compared
:param mem2: second of two memory objects to be compared
:param merged_constraint: ConstraintSet object that is to be used with `solver.must_be_true()` calls to check the
memory objects for semantic equality
:return: True, if the memory objects are equal, False otherwise
'''
maps1 = sorted(list(mem1.maps))
maps2 = sorted(list(mem2.maps))
if len(maps1) != len(maps2):
return False
ret_val = None
for m1, m2 in zip(maps1, maps2):
if m1 != m2: # compares the maps' names, permissions, starts, and ends
ret_val = False
break
# Compare concrete byte values in the data in these memory maps for equality
bytes1 = m1[m1.start:m1.end]
bytes2 = m2[m2.start:m2.end]
if bytes1 != bytes2:
ret_val = False
break
checked_addrs = []
# compare symbolic byte values in memory
#hack to avoid importing SMemory because that import introduces a circular dependency on ManticoreBase
if mem1.__class__.__name__ == 'SMemory' and ret_val is not None:
for addr1, _ in mem1._symbols.items():
checked_addrs.append(addr1)
if not compare_byte_vals(mem1, mem2, addr1, merged_constraint):
ret_val = False
break
#hack to avoid importing SMemory because that import introduces a circular dependency on ManticoreBase
if mem2.__class__.__name__ == 'SMemory' and ret_val is not None:
for addr2, _ in mem2._symbols.items():
if addr2 not in checked_addrs:
if not compare_byte_vals(mem1, mem2, addr2, merged_constraint):
ret_val = False
break
if ret_val is not None:
return ret_val
else:
return True


def is_merge_possible(state1, state2, merged_constraint):
'''
Checks if a merge is possible by checking Input, Output sockets, symbolic_files, syscall_trace, and memory
for equality.
:param state1: one of two possible states we want to check for mergeability
:param state2: second of two possible states we want to check for mergeability
:param merged_constraint: ConstraintSet of merged constraints for state1 and state2
:return: True, if state1 and state2 can be merged, False if otherwise
'''
platform1 = state1.platform
platform2 = state2.platform

ret_val = None

# compare input and output sockets of the states
if not compare_sockets(merged_constraint, platform1.input, platform2.input) or \
not compare_sockets(merged_constraint, platform1.output, platform2.output):
ret_val = False, "inequivalent socket operations"

# compare symbolic files opened by the two states
if ret_val is None and platform1.symbolic_files != platform2.symbolic_files:
ret_val = False, "inequivalent symbolic files"

# compare system call traces of the two states
if ret_val is None and len(platform1.syscall_trace) != len(platform2.syscall_trace):
ret_val = False, "inequivalent syscall trace lengths"
if ret_val is None:
for i, (name1, fd1, data1) in enumerate(platform1.syscall_trace):
(name2, fd2, data2) = platform2.syscall_trace[i]
if not (name1 == name2 and fd1 == fd2 and compare_buffers(merged_constraint, data1, data2)):
ret_val = False, "inequivalent syscall traces"
break

# compare memory of the two states
if ret_val is None and not compare_mem(state1.mem, state2.mem, merged_constraint):
ret_val = False, "inequivalent memory"
if ret_val is not None:
return ret_val
else:
return True, None


def merge_cpu(cpu1, cpu2, state, exp1, merged_constraint):
'''
Merge CPU objects into the state.CPU
:param cpu1: one of two CPU objects that we wish to merge
:param cpu2: second of two CPU objects that we wish to merge
:param state: the state whose CPU attribute we will be updating
:param exp1: the expression that if satisfiable will cause the CPU registers to take corresponding values from
`cpu1`, else they will take corresponding values from `cpu2`
:param merged_constraint: ConstraintSet under which we would want inequality between CPU register values to be
satisfiable as checked using `solver.must_be_true()`
:return: List of registers that were merged
'''
merged_regs = []
for reg in cpu1.canonical_registers:
val1 = cpu1.read_register(reg)
val2 = cpu2.read_register(reg)
if isinstance(val1, BitVec) and isinstance(val2, BitVec):
assert val1.size == val2.size
if issymbolic(val1) or issymbolic(val2) or val1 != val2:
if solver.must_be_true(merged_constraint, val1 != val2):
merged_regs.append(reg)
if cpu1.regfile.sizeof(reg) == 1:
state.cpu.write_register(reg, Operators.ITE(exp1, val1, val2))
else:
state.cpu.write_register(reg, Operators.ITEBV(cpu1.regfile.sizeof(reg), exp1, val1, val2))
return merged_regs


def merge(state1, state2, exp1, merged_constraint):
'''
Merge state1 and state2 into a single state
:param state1:
:param state2:
:param exp1:
:param merged_constraint:
:return: the state that is the result of the merging of `state1` and `state2`
'''
merged_state = state1
merged_reg_list = merge_cpu(state1.cpu, state2.cpu, merged_state, exp1, merged_constraint)
print("Merged registers: ")
print(*merged_reg_list, sep=',')
merged_state.constraints = merged_constraint
return merged_state