-
Notifications
You must be signed in to change notification settings - Fork 0
/
program.py
315 lines (259 loc) · 10.3 KB
/
program.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import logging
import sys
from loader.pe.pe_module import PEModule
from enum import Enum
from loader.elf.elf_module import ELFModule
from loader.pe.abstract_coff_module import AbstractCOFFModule
from loader.win32_stub_library import Win32StubLibrary
class TargetOS(Enum):
WINDOWS = 1
LINUX = 2
UNKNOWN = 3
def create_program(arch):
""" Initailize program with architecture object
Input -
arch (Architecture obj) - physical architecture program runs on
encapsulates all specifics of analysis
Output -
program_instance (Program obj) - Project interface handle
"""
program_instance = Program(arch)
return program_instance
class Program:
program_instance = None
#############################################################
# Accessors
#############################################################
def getProgram(self):
return self.program_instance;
def get_statement_couunt(self):
return len(self.statement_map.keys())
def get_assembly_couunt(self):
return len(self.assembly_map.keys())
def get_harness(self):
return self.harness
def get_target_os(self):
return self._target_os
def get_arch(self):
return self.arch
def get_cfg(self):
return self.cfg
def get_start(self):
return self.exported_symbols.keys()
def get_symbols(self):
return self.start
def get_main_module(self):
""" Returns handle to main module of program
Input -
None
Output -
main_module (ExecutableImage obj) - hanle to module with entry point
"""
return self.main_module
#############################################################
# Core Logic
#############################################################
def __init__(self, arch):
""" Instance method that initalizes a program object, constructor
Input -
self (Program(self)) - handle to class for instance method
arch (Architecture obj) - physical architecture program runs on
encapsulates all specifics of analysis
Output -
None
"""
self.arch = arch
# must define target_os module unknown
self.target_os = TargetOS.UNKNOWN
# list of executable images
self.modules = []
# assembly map is sorted dictionary, TODO::switch to sorted dict
self.assembly_map = {}
# map from rtl label to rtl statement, default size 2000
self.statement_map = {}
# map from text to export symbol obj
self.exported_symbols = {}
# set of unresolved symbols objects
self.unresolved_symbols = set()
# set of rtl_labels
self.unresolved_branches = set()
def remove_decoration(s):
""" processes out @ portion of symbol name
Input -
s (String) - input symbol name to process
Output -
s (String) - subset of s that remove character at beginning and strips after @
"""
if len(s) > 0 and (s[0] == '@' or s[0] == '_'):
s = s[1:]
i = s.index('@')
if i >= 0:
s = s[:i]
return s
def resolve_symbols(self):
""" Resolves symbols between loaded modules
TODO:: Determine if this needs to be reset to unresolved_symbols
Input -
None
Output -
None
"""
unresolved_symbol_lst = list(self.unresolved_symbols)
for unres_iter in unresolved_symbol_lst:
symbol = self.exported_symbols[remove_decoration(unres_iter)]
if symbol is not None:
logging.debug('Resolving symbol %s'%unres_iter.get_name())
unres_iter.resolve(symbol.get_addr())
def load_module(self, module_file):
module = None
module = PEModule(module_file, self.get_arch())
self.target_os = TargetOS.WINDOWS
try:
# TODO::Define Windows object file class
module = PEModule(module_file, self.get_arch())
self.target_os = TargetOS.WINDOWS
except:
try:
# TODO::Define object file class
#module = ObjectFile(moduleFile, get_arch())
pass
except:
# TODO::Define raw module class
# module = RawModule(module_file, get_arch())
pass
for existing_module in self.modules:
if existing_module.get_max_addr() >= module.get_min_addr() \
and existing_module.get_min_addr() < module.get_max_addr():
raise Exception("Virtual Address of Modules Overlap!")
self.modules.append(module)
self.unresolved_symbols.union(module.get_unresolved_symbols())
#TODO
for symbol in module.get_export_symbols():
self.exported_symbols.add(remove_decoration(symbol.get_name()))
self.resolve_symbols()
return module
def is_stub(self, abs_addr):
""" TODO::Understand what this function is doing and weather this is instance or not
"""
return abs_addr.get_value() >= stub_provider.STUB_BASE
def is_import(self, abs_addr):
if is_stub(abs_addr):
return True
module = get_module(abs_addr)
if module is None:
return False
return module.is_import_area(abs_addr)
def install_stubs(self):
print(type(self.main_module))
if isinstance(self.main_module, AbstractCOFFModule):
# TODO:: Define AbstractCOFFModule
self.stub_library = Win32StubLibrary(self.arch)
elif isinstance(self.main_module, ELFModule):
# TODO:: Define ELFModule
self.stub_library = LinuxStubLibrary(self.arch)
else:
logging.error("undefined arch for stubs")
sys.exit()
unresolved_symbol_lst = list(self.unresolved_symbols)
for unres_iter in unresolved_symbol_lst:
address = stubLibary.resolve_symbol(unres_iter.get_from_library(), unres_iter.get_name())
if address is not None:
unres_iter.resolve(address)
# TODO::Determine if this is possible to remove element from set
self.unresolved_symbols.remove(unres_iter)
if not len(self.unresolved_symbols) == 0:
logging.warning("Unresolved Symbols Remaining: ", self.unresolved_symbols)
def install_harness(self, harness):
""" Install a harness that sets up the symbolic environment before alling main
and provides reeturn point with termiantion statement
Input -
harness (harness obj) - the harness object to install
Output -
None
"""
self.harness = harness
harness.install(self)
def set_start(self, label):
""" Set the program entry point to the given label
Input -
label (RTLLabel obj) - label of the entry point
Output -
None
"""
self.start = label
def set_entry_address(self, entry_address):
""" Set the entyr point to the given address
Input -
entry_address (absolute address obj) - the new entry address
Output -
None
"""
# TODO:: Define RTLLabel
self.set_start(RTLLabel(entry_address))
def get_module(self, abs_addr):
""" Get the module that contains the specified virtual address at runtime
Input -
abs_addr (AbsoluteAddress Obj) - a virtual address
Output -
module (ExecutableImage obj) - the module to which the virtual address belongs
"""
for module in self.modules:
if module.get_file_pointer(abs_addr) >= 0:
return module
return None
def get_statment(self, label):
if label not in self.statement_map:
address = label.get_addr()
instr = get_instruction(address)
if instr is None:
# TODO:: Define RTLHalt
half = RTLHalt()
half.set_label(label)
put_statement(halt)
logging.error("ERROR: Replacing unknown instruction with HALT.")
if options.debug.get_value():
raise Exception("Disassembly failed at ", address)
else:
try:
seq = arch.get_rtl_equivalent(address, instr)
for rtlstatement in seq:
put_statement(rtlstatement)
except:
logging.error("Error during translation of instruction to IL")
# TODO:: Define RTLSkip
skip = RTLSkip()
skip.set_label(label)
skip.set_net_label(RTLLabel(AbsoluteAddress(address.get_value()+1)))
put_statement(skip)
if label not in self.statement_map:
raise Exception("Disassembly did not produce label:", label)
return self.statement_map[label]
def contains_label(self, label):
return label in self.statement_map
def load_main_module(self, module_file):
""" loads module containing main function, called last for symbol resolution
Input -
module_file (file descriptor) - file to be loaded
Output -
module (executable image object) - exectuable image obj of loaded module
"""
module = self.load_module(module_file)
self.main_module = module
# must define executable image entry point, TODO::define stub ExectuableImage
set_entry_address = module.get_entry_point()
self.install_stubs()
return module
"""
TODO:: Implement the following functions
pcout instruction
get instruction string
get symbol for
get address for symbol
symbol finder
get unresolved branches
set unresolved branches
get asssembly map
get used variables
set cfa
count indirect branches
"""