Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate the API between cached/uncached methods #5

Merged
merged 3 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions bin/binexporter
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,28 @@ import click

from binexport import ProgramBinExport

BINARY_FORMAT = {'application/x-dosexec',
'application/x-sharedlib',
'application/x-mach-binary',
'application/x-executable',
'application/x-pie-executable'}
BINARY_FORMAT = {
"application/x-dosexec",
"application/x-sharedlib",
"application/x-mach-binary",
"application/x-executable",
"application/x-pie-executable",
}

EXTENSIONS_WHITELIST = {'application/octet-stream': ['.dex']}
EXTENSIONS_WHITELIST = {"application/octet-stream": [".dex"]}

CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'],
max_content_width=300)
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=300)


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option('-i', '--ida-path', type=click.Path(exists=True), default=None, help="IDA Pro installation directory")
@click.option('-v', '--verbose', count=True, help="To activate or not the verbosity")
@click.option(
"-i",
"--ida-path",
type=click.Path(exists=True),
default=None,
help="IDA Pro installation directory",
)
@click.option("-v", "--verbose", count=True, help="To activate or not the verbosity")
@click.argument("input_file", type=click.Path(exists=True), metavar="<binary file>")
def main(ida_path: str, input_file: str, verbose: bool) -> None:
"""
Expand All @@ -38,14 +45,18 @@ def main(ida_path: str, input_file: str, verbose: bool) -> None:
:return: None
"""

logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.DEBUG if verbose else logging.INFO)
logging.basicConfig(
format="[%(levelname)s] %(message)s", level=logging.DEBUG if verbose else logging.INFO
)

if ida_path:
os.environ['IDA_PATH'] = pathlib.Path(ida_path).absolute().as_posix()
os.environ["IDA_PATH"] = pathlib.Path(ida_path).absolute().as_posix()

mime_type = magic.from_file(input_file, mime=True)
input_file = pathlib.Path(input_file)
if mime_type not in BINARY_FORMAT and input_file.suffix not in EXTENSIONS_WHITELIST.get(mime_type, []):
if mime_type not in BINARY_FORMAT and input_file.suffix not in EXTENSIONS_WHITELIST.get(
mime_type, []
):
logging.error("the file is not an executable file")
exit(1)

Expand All @@ -56,5 +67,5 @@ def main(ida_path: str, input_file: str, verbose: bool) -> None:
exit(1)


if __name__ == '__main__':
if __name__ == "__main__":
main()
77 changes: 63 additions & 14 deletions binexport/basic_block.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import weakref
from collections import OrderedDict
from typing import Optional
from functools import cached_property

from binexport.utils import instruction_index_range, get_instruction_address
from binexport.instruction import InstructionBinExport
from binexport.types import Addr


class BasicBlockBinExport(OrderedDict):
class BasicBlockBinExport:
"""
Basic block.
It inherits OrderdDict, so one can use any dictionary
methods to access instructions.
Basic block class.
"""

def __init__(self, program: weakref.ref["ProgramBinExport"], function: weakref.ref["FunctionBinExport"], pb_bb: "BinExport2.BasicBlock"):
def __init__(
self,
program: weakref.ref["ProgramBinExport"],
function: weakref.ref["FunctionBinExport"],
pb_bb: "BinExport2.BasicBlock",
):
"""
:param program: Weak reference to the program
:param function: Weak reference to the function
Expand All @@ -24,8 +26,10 @@ def __init__(self, program: weakref.ref["ProgramBinExport"], function: weakref.r
super(BasicBlockBinExport, self).__init__()

self._program = program
self.addr: Addr = None #: basic bloc address
self._function = function
self.pb_bb = pb_bb

self.addr: Addr = None #: basic bloc address
self.bytes = b"" #: bytes of the basic block

# Ranges are in fact the true basic blocks but BinExport
Expand All @@ -34,15 +38,11 @@ def __init__(self, program: weakref.ref["ProgramBinExport"], function: weakref.r
# might be merged into a single basic block so the edge gets lost.
for rng in pb_bb.instruction_index:
for idx in instruction_index_range(rng):
pb_inst = self.program.proto.instruction[idx]
inst_addr = get_instruction_address(self.program.proto, idx)
self.bytes += self.program.proto.instruction[idx].raw_bytes

# The first instruction determines the basic block address
if self.addr is None:
self.addr = inst_addr

self.bytes += pb_inst.raw_bytes
self[inst_addr] = InstructionBinExport(self._program, function, inst_addr, idx)
self.addr = get_instruction_address(self.program.proto, idx)

def __hash__(self) -> int:
"""
Expand All @@ -66,3 +66,52 @@ def program(self) -> "ProgramBinExport":
:return: object :py:class:`ProgramBinExport`, program associated to the basic block
"""
return self._program()

@property
def function(self) -> "FunctionBinExport":
"""
Wrapper on weak reference on FunctionBinExport

:return: object :py:class:`FunctionBinExport`, function associated to the basic block
"""
return self._function()

@property
def uncached_instructions(self) -> dict[Addr, InstructionBinExport]:
"""
Returns a dict which is used to reference all the instructions in this basic
block by their address.
The object returned is not cached, calling this function multiple times will
create the same object multiple times. If you want to cache the object you
should use `BasicBlockBinExport.instructions`.

:return: dictionary of addresses to instructions
"""

instructions = {}

# Ranges are in fact the true basic blocks but BinExport
# doesn't have the same basic block semantic and merge multiple basic blocks into one.
# For example: BB_1 -- unconditional_jmp --> BB_2
# might be merged into a single basic block so the edge gets lost.
for rng in self.pb_bb.instruction_index:
for idx in instruction_index_range(rng):
inst_addr = get_instruction_address(self.program.proto, idx)

instructions[inst_addr] = InstructionBinExport(
self._program, self._function, inst_addr, idx
)

return instructions

@cached_property
def instructions(self) -> dict[Addr, InstructionBinExport]:
"""
Returns a dict which is used to reference all the instructions in this basic
block by their address.
The object returned is by default cached, to erase the cache delete the attribute.

:return: dictionary of addresses to instructions
"""

return self.uncached_instructions
32 changes: 18 additions & 14 deletions binexport/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@ class ExpressionBinExport:
64: "zmmword",
}

def __init__(self,
program: "ProgramBinExport",
function: "FunctionBinExport",
instruction: "InstructionBinExport",
exp_idx: int,
parent: ExpressionBinExport | None = None):
def __init__(
self,
program: "ProgramBinExport",
function: "FunctionBinExport",
instruction: "InstructionBinExport",
exp_idx: int,
parent: ExpressionBinExport | None = None,
):
"""
:param program: reference to program
:param function: reference to function
Expand Down Expand Up @@ -101,10 +103,12 @@ def depth(self) -> int:
return 0
return self.parent.depth + 1

def _parse_protobuf(self,
program: "ProgramBinExport",
function: "FunctionBinExport",
instruction: "InstructionBinExport") -> None:
def _parse_protobuf(
self,
program: "ProgramBinExport",
function: "FunctionBinExport",
instruction: "InstructionBinExport",
) -> None:
"""
Low-level expression parser. It populates self._type and self._value
"""
Expand All @@ -123,9 +127,7 @@ def _parse_protobuf(self,
if self.pb_expr.immediate in instruction.data_refs: # Data
self.is_addr = True
self.is_data = True
elif (
self.pb_expr.immediate in program or self.pb_expr.immediate in function
): # Address
elif self.pb_expr.immediate in program or self.pb_expr.immediate in function: # Address
self.is_addr = True

elif self.pb_expr.type == BinExport2.Expression.IMMEDIATE_FLOAT:
Expand All @@ -149,4 +151,6 @@ def _parse_protobuf(self,
self._value = self.pb_expr.symbol

else:
logging.error(f"Malformed protobuf message. Invalid expression type {self.pb_expr.type}")
logging.error(
f"Malformed protobuf message. Invalid expression type {self.pb_expr.type}"
)
47 changes: 34 additions & 13 deletions binexport/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ class FunctionBinExport:
Also references its parents and children (function it calls).
"""

def __init__(self,
program: weakref.ref["ProgramBinExport"],
*,
pb_fun: "BinExport2.FlowGraph | None" = None,
is_import: bool = False,
addr: Addr | None = None):
def __init__(
self,
program: weakref.ref["ProgramBinExport"],
*,
pb_fun: "BinExport2.FlowGraph | None" = None,
is_import: bool = False,
addr: Addr | None = None,
):
"""
Constructor. Iterates the FlowGraph structure and initialize all the
basic blocks and instruction accordingly.
Expand All @@ -34,8 +36,8 @@ def __init__(self,
super(FunctionBinExport, self).__init__()

self.addr: Addr | None = addr #: address, None if imported function
self.parents: Set['FunctionBinExport'] = set() #: set of function call this one
self.children: Set['FunctionBinExport'] = set() #: set of functions called by this one
self.parents: Set["FunctionBinExport"] = set() #: set of function call this one
self.children: Set["FunctionBinExport"] = set() #: set of functions called by this one

# Private attributes
self._graph = None # CFG. Loaded inside self.blocks
Expand Down Expand Up @@ -110,15 +112,18 @@ def program(self) -> "ProgramBinExport":
"""
return self._program()

@cached_property
def blocks(self) -> Dict[Addr, BasicBlockBinExport]:
@property
def uncached_blocks(self) -> dict[Addr, BasicBlockBinExport]:
"""
Returns a dict which is used to reference all basic blocks by their address.
The dict is by default cached, to erase the cache delete the attribute.
Calling this function will also load the CFG.
The object returned is not cached, calling this function multiple times will
create the same object multiple times. If you want to cache the object you
should use `FunctionBinExport.blocks`.

:return: dictionary of addresses to basic blocks
"""

# Fast return if it is a imported function
if self.is_import():
if self._graph is None:
Expand All @@ -134,10 +139,14 @@ def blocks(self) -> Dict[Addr, BasicBlockBinExport]:
# Load the basic blocks
bb_i2a = {} # Map {basic block index -> basic block address}
for bb_idx in self._pb_fun.basic_block_index:
basic_block = BasicBlockBinExport(self._program, weakref.ref(self), self.program.proto.basic_block[bb_idx])
basic_block = BasicBlockBinExport(
self._program, weakref.ref(self), self.program.proto.basic_block[bb_idx]
)

if basic_block.addr in bblocks:
logging.error(f"0x{self.addr:x} basic block address (0x{basic_block.addr:x}) already in(idx:{bb_idx})")
logging.error(
f"0x{self.addr:x} basic block address (0x{basic_block.addr:x}) already in(idx:{bb_idx})"
)

bblocks[basic_block.addr] = basic_block
bb_i2a[bb_idx] = basic_block.addr
Expand All @@ -160,6 +169,18 @@ def blocks(self) -> Dict[Addr, BasicBlockBinExport]:

return bblocks

@cached_property
def blocks(self) -> Dict[Addr, BasicBlockBinExport]:
"""
Returns a dict which is used to reference all basic blocks by their address.
Calling this function will also load the CFG.
The dict is by default cached, to erase the cache delete the attribute.

:return: dictionary of addresses to basic blocks
"""

return self.uncached_blocks

@property
def graph(self) -> networkx.DiGraph:
"""
Expand Down
24 changes: 20 additions & 4 deletions binexport/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ class InstructionBinExport:
Instruction class. It represents an instruction with its operands.
"""

def __init__(self, program: weakref.ref["ProgramBinExport"], function: weakref.ref["FunctionBinExport"], addr: Addr, i_idx: int):
def __init__(
self,
program: weakref.ref["ProgramBinExport"],
function: weakref.ref["FunctionBinExport"],
addr: Addr,
i_idx: int,
):
"""
:param program: Weak reference to the program
:param function: Weak reference to the function
Expand Down Expand Up @@ -55,13 +61,23 @@ def mnemonic(self) -> str:
"""
return self.program.proto.mnemonic[self.pb_instr.mnemonic_index].name

@cached_property
def operands(self) -> List[OperandBinExport]:
@property
def uncached_operands(self) -> list[OperandBinExport]:
"""
Returns a list of the operands instanciated dynamically on-demand.
The list is cached by default, to erase the cache delete the attribute.
The object returned is not cached, calling this function multiple times will
create the same object multiple times. If you want to cache the object you
should use `InstructionBinExport.operands`.
"""
return [
OperandBinExport(self._program, self._function, weakref.ref(self), op_idx)
for op_idx in self.pb_instr.operand_index
]

@cached_property
def operands(self) -> List[OperandBinExport]:
"""
Returns a list of the operands instanciated dynamically on-demand.
The list is cached by default, to erase the cache delete the attribute.
"""
return self.uncached_operands
Loading