Integrating @feliam's changes in so that the basic_statemerging.py ex…

…ample is now using the Merge plugin 1. Using Merge plugin and the newly added APIs in Executor to load, delete, replace states 2. Completing the memory objects comparison by comparing both memory object's symbolic writes instead of only comparing the first memory object against the second 3. Adding documentation for all of the newly introduced methods in state_merging.py
trailofbits · vaibhavbsharma · Jan 11, 2019 · Jan 12, 2019 · Jan 12, 2019 · Jan 15, 2019
commit e871f7296718e8717123be33cae5ef039fa53ab2
@@ -5,7 +5,7 @@
 from manticore.utils import config
 
 from manticore.native import Manticore
-
+from manticore.core.executor import Merger
 '''
 Demonstrates the ability to set a basic hook on a specific program counter and
 the ability to read from memory.
@@ -26,6 +26,6 @@ def did_load_state_callback(_, state, state_id):
 
     m.subscribe('will_load_state', will_load_state_callback)
     m.subscribe('did_load_state', did_load_state_callback)
-
+    m.register_plugin(Merger())
     m.run()
 
@@ -4,6 +4,8 @@
 import logging
 import signal
 
+from .state_merging import merge_constraints, is_merge_possible, merge
+from .plugin import Plugin
 from ..exceptions import ExecutorError, SolverError
 from ..utils.nointerrupt import WithKeyboardInterruptAs
 from ..utils.event import Eventful
@@ -15,8 +17,6 @@
 from multiprocessing.managers import SyncManager
 from contextlib import contextmanager
 
-from core.state_merging import is_merge_possible, merge, merge_constraints
-
 # This is the single global manager that will handle all shared memory among workers
 
 consts = config.get_group('executor')
@@ -164,29 +164,30 @@ def choice(self, state_ids):
         else:
             return None
 
-def Merger(Plugin):
+
+
+class Merger(Plugin):
     def load_state(self, state_id, delete=False):
         return self.manticore._executor._load_state(state_id, delete)
 
     def delete_state(self, state_id):
         return self.manticore._executor._delete_state(state_id)
-        
+
     def replace_state(self, state_id, state):
         return self.manticore._executor._replace_state(state_id, state)
 
-    def did_enqueue_state_callback(self, state, state_id):
-        # when a new state is addded to the list we save it so we do not have 
+    def did_enqueue_state_callback(self, state_id, state):
+        # when a new state is addded to the list we save it so we do not have
         # to repload all states when try to merges last PC
-        with self.locked_context('cpu_stateid_dict') as cpu_stateid_dict:
-            # as we may be riunning in a different process we need to access this
+        with self.locked_context('cpu_stateid_dict', dict) as cpu_stateid_dict:
+            # as we may be running in a different process we need to access this
             # on a lock and over shared memory like this
             l = cpu_stateid_dict.get(state.cpu.PC, list())
             l.append(state_id)
             cpu_stateid_dict[state.cpu.PC] = l
 
-
-    def will_load_state(self, current_state_id):
-        # When a state is loaded for exploration lets check if we can find it a 
+    def will_load_state_callback(self, current_state_id):
+        # When a state is loaded for exploration lets check if we can find it a
         # mate for merging
         with self.locked_context('cpu_stateid_dict') as cpu_stateid_dict:
             # we get the lock and get a copy of the shared context
@@ -195,7 +196,7 @@ def will_load_state(self, current_state_id):
 
             # lets remove ourself from the list of waiting states
             assert current_state_id in states_at_pc
-            del states_at_pc[current_state_id]
+            states_at_pc.remove(current_state_id)
 
             #Iterate over all remaining states that are waiting for exploration
             #at the same PC
@@ -204,33 +205,32 @@ def will_load_state(self, current_state_id):
                 new_state = self.load_state(new_state_id)
                 (exp_merged_state, exp_new_state, merged_constraint) = merge_constraints(merged_state.constraints, new_state.constraints)
                 is_mergeable, reason = is_merge_possible(merged_state, new_state, merged_constraint)
-                
+
                 if is_mergeable:
                     #Ok we'll merge it!
-                    merged_state = merge(merged_state, new_state, exp_merged_state, exp_new_state, merged_constraint)
+                    merged_state = merge(merged_state, new_state, exp_merged_state, merged_constraint)
 
                     #lets remove the vestigial links to the old state
                     self.delete_state(new_state_id)
-                    self._states.remove(new_state_id) # we are locked under locked context
 
                     merged_ids.append(new_state_id)
                     is_mergeable = "succeeded"
                 else:
                     is_mergeable = "failed because of " + reason
-                debug_string = "at PC = " + hex(current_state.cpu.PC) + \
+                debug_string = "at PC = " + hex(merged_state.cpu.PC) + \
                                ", merge " + is_mergeable + " for state id = " + \
                                str(current_state_id) + " and " + str(new_state_id)
                 print(debug_string)
 
             for i in merged_ids:
-                 states_at_pc.remove(i)
+                states_at_pc.remove(i)
 
-            cpu_stateid_dict[current_state.cpu.PC] = states_at_pc
+            cpu_stateid_dict[merged_state.cpu.PC] = states_at_pc
 
             #Ok so we have merged current_state_id with {merged_ids}
             #And removed all merged_ids from everywhere
 
-            #UGLY we are replacing a state_id. This may be breaking caches in 
+            #UGLY we are replacing a state_id. This may be breaking caches in
             #the future
             self.replace_state(current_state_id, merged_state)
 
@@ -414,24 +414,20 @@ def get(self):
 
     @sync
     def _load_state(self, state_id, delete=False):
-        if state_id not in self._states:
-            raise Exception("State does not exist")
         if delete:
-            del self._states[self._states.index(state_id)]
-        return self._workspace.load_state(new_state_id, delete=delete)
+            if state_id in self._states:
+                del self._states[self._states.index(state_id)]
+        return self._workspace.load_state(state_id, delete=delete)
 
     @sync
     def _delete_state(self, state_id):
-        if state_id not in self._states:
-            raise Exception("State does not exist")
-        del self._states[self._states.index(state_id)]
-        return self._workspace.rm(state_id)
+        if state_id in self._states:
+            del self._states[self._states.index(state_id)]
+        return self._workspace.rm_state(state_id)
 
     @sync
     def _replace_state(self, state_id, state):
-        if state_id not in self._states:
-            raise Exception("State id does not exist")
-        self._workspace.rm(state_id)
+        # self._workspace.rm_state(state_id)
         self._workspace.save_state(state, state_id)
 
     def list(self):

@@ -1,7 +1,19 @@
-from manticore.core.smtlib import solver, ConstraintSet, Operators, issymbolic, BitVec
+from ..native.memory import SMemory
+
+from .smtlib import solver, ConstraintSet, Operators, issymbolic, BitVec
 
 
 def compare_sockets(cs, socket1, socket2):
+    '''
+    This method compares Socket objects for equality using the buffer and peer attributes.
+    It uses `compare_buffers` for checking buffer attributes for equality.
+    It calls itself for comparing peer Socket objects.
+    Returns True if the Socket objects are equal, false otherwise.
+    :param cs: ConstraintSet to be used for checking Socket.buffer for semantic equality using `solver.must_be_true()`
+    :param socket1: one of two Socket objects to be compared for equality against socket2
+    :param socket2: one of two Socket objects to be compared for equality against socket1
+    :return: True, if the Socket objects are found to be equal, False otherwise
+    '''
     if socket1 is None:
         return socket2 is None
     if socket2 is None:
@@ -12,6 +24,13 @@ def compare_sockets(cs, socket1, socket2):
 
 
 def compare_buffers(cs, buffer1, buffer2):
+    '''
+    This method compares the two List objects for equality using the `solver.must_be_true()` call.
+    :param cs: ConstraintSet to be used for checking buffer1 for semantic equality with buffer2 using `solver.must_be_true()`
+    :param buffer1: one of two List objects to be compared for equality against buffer2
+    :param buffer2: one of two List objects to be compared for equality against buffer1
+    :return: True, if the List objects are equal, False otherwise
+    '''
     if len(buffer1) != len(buffer2):
         return False
     for b1, b2 in zip(buffer1, buffer2):
@@ -21,6 +40,13 @@ def compare_buffers(cs, buffer1, buffer2):
 
 
 def merge_constraints(constraints1, constraints2):
+    '''
+    :param constraints1: one of two ConstraintSet objects to be merged
+    :param constraints2: second of two ConstraintSet objects to be merged
+    :return: (Expression, Expression, ConstraintSet) where the first and second Expression objects are conjunctions of
+    of all the constraints in constraints1 and constraints2 respectively. The ConstraintSet is an object that contains
+    a single constraint that is a logical OR of these two Expression objects.
+    '''
     exp1 = constraints1.constraints[0]
     for i in range(1, len(constraints1.constraints)):
         exp1 = exp1 & constraints1.constraints[i]
@@ -32,7 +58,38 @@ def merge_constraints(constraints1, constraints2):
     return exp1, exp2, merged_constraint
 
 
+def compare_byte_vals(mem1, mem2, addr, merged_constraint):
+    '''
+    Compares values in memory at address `addr`, returns True if they are semantically equal, False otherwise
+    :param mem1: first of two memory objects we want to use for comparison
+    :param mem2: second of two memory objects we want to use for comparison
+    :param addr: address at which bytes values are to be compared
+    :param merged_constraint: ConstraintSet to be used when using the call to `solver.must_be_true()`
+    :return: returns True if 1 byte values at address `addr` in `mem1` and `mem2` are semantically equal, False otherwise
+    '''
+    val1 = mem1.read(addr, 1)
+    val2 = mem2.read(addr, 1)
+    # since we only read a single byte value, these lists should only have one entry in them
+    assert len(val1) == 1 and len(val2) == 1
+    cond_to_check = (val1[0] == val2[0])
+    if not solver.must_be_true(merged_constraint, cond_to_check):
+        return False
+    else:
+        return True
+
+
 def compare_mem(mem1, mem2, merged_constraint):
+    '''
+    This method compares the number of maps, and then their names, permissions, start, and end values.
+    If they all match, then it compares the concrete byte values for equality.
+    If those match too, it then compares _symbols attribute values for equality if the two memory objects are of
+    type SMemory.
+    :param mem1: one of two memory objects to be compared
+    :param mem2: second of two memory objects to be compared
+    :param merged_constraint: ConstraintSet object that is to be used with `solver.must_be_true()` calls to check the
+    memory objects for semantic equality
+    :return: True, if the memory objects are equal, False otherwise
+    '''
     maps1 = sorted(list(mem1.maps))
     maps2 = sorted(list(mem2.maps))
     if len(maps1) != len(maps2):
@@ -45,19 +102,30 @@ def compare_mem(mem1, mem2, merged_constraint):
         bytes2 = m2[m2.start:m2.end]
         if bytes1 != bytes2:
             return False
+    checked_addrs = []
     # compare symbolic byte values in memory
-    for addr1, _ in mem1._symbols.items():
-        val1 = mem1.read(addr1, 1)
-        val2 = mem2.read(addr1, 1)
-        # since we only read a single byte value, these lists should only have one entry in them
-        assert len(val1) == 1 and len(val2) == 1
-        cond_to_check = (val1[0] == val2[0])
-        if not solver.must_be_true(merged_constraint, cond_to_check):
-            return False
+    if isinstance(mem1, SMemory):
+        for addr1, _ in mem1._symbols.items():
+            checked_addrs.append(addr1)
+            if not compare_byte_vals(mem1, mem2, addr1, merged_constraint):
+                return False
+    if isinstance(mem2, SMemory):
+        for addr2, _ in mem2._symbols.items():
+            if addr2 not in checked_addrs:
+                if not compare_byte_vals(mem1, mem2, addr2, merged_constraint):
+                    return False
     return True
 
 
 def is_merge_possible(state1, state2, merged_constraint):
+    '''
+    Checks if a merge is possible by checking Input, Output sockets, symbolic_files, syscall_trace, and memory
+    for equality.
+    :param state1: one of two possible states we want to check for mergeability
+    :param state2: second of two possible states we want to check for mergeability
+    :param merged_constraint: ConstraintSet of merged constraints for state1 and state2
+    :return: True, if state1 and state2 can be merged, False if otherwise
+    '''
     platform1 = state1.platform
     platform2 = state2.platform
 
@@ -84,8 +152,16 @@ def is_merge_possible(state1, state2, merged_constraint):
     return True, None
 
 
-#TODO
-def merge_cpu(cpu1, cpu2, state, exp1, exp2):
+def merge_cpu(cpu1, cpu2, state, exp1):
+    '''
+    Merge CPU objects into the state.CPU
+    :param cpu1: one of two CPU objects that we wish to merge
+    :param cpu2: second of two CPU objects that we wish to merge
+    :param state: the state whose CPU attribute we will be updating
+    :param exp1: the expression that if satisfiable will cause the CPU registers to take corresponding values from
+    `cpu1`, else they will take corresponding values from `cpu2`
+    :return: No return value
+    '''
     for reg in cpu1.canonical_registers:
         val1 = cpu1.read_register(reg)
         val2 = cpu2.read_register(reg)
@@ -98,10 +174,16 @@ def merge_cpu(cpu1, cpu2, state, exp1, exp2):
                 state.cpu.write_register(reg, Operators.ITEBV(cpu1.regfile.sizeof(reg), exp1, val1, val2))
 
 
-def merge(state1, state2, exp1, exp2, merged_constraint):
+def merge(state1, state2, exp1, merged_constraint):
+    '''
+    Merge state1 and state2 into a single state
+    :param state1:
+    :param state2:
+    :param exp1:
+    :param merged_constraint:
+    :return: the state that is the result of the merging of `state1` and `state2`
+    '''
     merged_state = state1
-    merge_cpu(state1.cpu, state2.cpu, merged_state, exp1, exp2)
+    merge_cpu(state1.cpu, state2.cpu, merged_state, exp1)
     merged_state.constraints = merged_constraint
     return merged_state
-
-