From 1ce07f01364cf2cfc5e928f94037ba5c75cb2897 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Fri, 17 Nov 2023 15:05:25 +0100 Subject: [PATCH] Cache objects below basic blocks. Functions can preload blocks --- src/binexport/basic_block.py | 21 +++---------- src/binexport/function.py | 60 +++++++++++++++++++++++++++--------- src/binexport/instruction.py | 20 +++++------- src/binexport/operand.py | 20 +++--------- 4 files changed, 61 insertions(+), 60 deletions(-) diff --git a/src/binexport/basic_block.py b/src/binexport/basic_block.py index 9e8bd5d..6cdae85 100644 --- a/src/binexport/basic_block.py +++ b/src/binexport/basic_block.py @@ -11,6 +11,7 @@ from .function import FunctionBinExport from .binexport2_pb2 import BinExport2 + class BasicBlockBinExport: """ Basic block class. @@ -81,14 +82,12 @@ def function(self) -> "FunctionBinExport": """ return self._function() - @property - def uncached_instructions(self) -> dict[Addr, InstructionBinExport]: + @cached_property + def instructions(self) -> dict[Addr, InstructionBinExport]: """ Returns a dict which is used to reference all the instructions in this basic block by their address. - The object returned is not cached, calling this function multiple times will - create the same object multiple times. If you want to cache the object you - should use `BasicBlockBinExport.instructions`. + The object returned is by default cached, to erase the cache delete the attribute. :return: dictionary of addresses to instructions """ @@ -108,15 +107,3 @@ def uncached_instructions(self) -> dict[Addr, InstructionBinExport]: ) return instructions - - @cached_property - def instructions(self) -> dict[Addr, InstructionBinExport]: - """ - Returns a dict which is used to reference all the instructions in this basic - block by their address. - The object returned is by default cached, to erase the cache delete the attribute. - - :return: dictionary of addresses to instructions - """ - - return self.uncached_instructions diff --git a/src/binexport/function.py b/src/binexport/function.py index 667e868..7907d2a 100644 --- a/src/binexport/function.py +++ b/src/binexport/function.py @@ -49,6 +49,8 @@ def __init__( self._name = None # Set by the Program constructor self._program = program self._pb_fun = pb_fun + self._enable_unloading = False + self._basic_blocks = None if is_import: if self.addr is None: @@ -70,6 +72,29 @@ def __hash__(self) -> int: def __repr__(self) -> str: return "<%s: 0x%x>" % (type(self).__name__, self.addr) + def __enter__(self) -> None: + """Preload basic blocks and don't deallocate them until __exit__ is called""" + + self._enable_unloading = False + self.preload() + + def __exit__(self, exc_type, exc_value, traceback) -> None: + """Deallocate all the basic blocks""" + + self._enable_unloading = True + self.unload() + + def preload(self) -> None: + """Load in memory all the basic blocks""" + + self._basic_blocks = self.blocks + + def unload(self) -> None: + """Unload from memory all the basic blocks""" + + if self._enable_unloading: + self._basic_blocks = None + def items(self) -> abc.ItemsView[Addr, "BasicBlockBinExport"]: """ Each function is associated to a dictionary with key-value @@ -117,17 +142,34 @@ def program(self) -> "ProgramBinExport": return self._program() @property - def uncached_blocks(self) -> dict[Addr, BasicBlockBinExport]: + def blocks(self) -> Dict[Addr, BasicBlockBinExport]: """ Returns a dict which is used to reference all basic blocks by their address. Calling this function will also load the CFG. - The object returned is not cached, calling this function multiple times will + By default the object returned is not cached, calling this function multiple times will create the same object multiple times. If you want to cache the object you - should use `FunctionBinExport.blocks`. + should use the context manager of the function or calling the function `FunctionBinExport.load`. + Ex: + + .. code-block:: python + :linenos: + + # func: FunctionBinExport + with func: # Loading all the basic blocks + for bb_addr, bb in func.blocks.items(): # Blocks are already loaded + pass + # The blocks are still loaded + for bb_addr, bb in func.blocks.items(): + pass + # here the blocks have been unloaded :return: dictionary of addresses to basic blocks """ + # Check if the blocks are already loaded + if self._basic_blocks is not None: + return self._basic_blocks + # Fast return if it is a imported function if self.is_import(): if self._graph is None: @@ -177,18 +219,6 @@ def uncached_blocks(self) -> dict[Addr, BasicBlockBinExport]: return bblocks - @cached_property - def blocks(self) -> Dict[Addr, BasicBlockBinExport]: - """ - Returns a dict which is used to reference all basic blocks by their address. - Calling this function will also load the CFG. - The dict is by default cached, to erase the cache delete the attribute. - - :return: dictionary of addresses to basic blocks - """ - - return self.uncached_blocks - @property def graph(self) -> networkx.DiGraph: """ diff --git a/src/binexport/instruction.py b/src/binexport/instruction.py index c9214f6..9f10361 100644 --- a/src/binexport/instruction.py +++ b/src/binexport/instruction.py @@ -10,6 +10,7 @@ from .function import FunctionBinExport from .binexport2_pb2 import BinExport2 + class InstructionBinExport: """ Instruction class. It represents an instruction with its operands. @@ -65,23 +66,16 @@ def mnemonic(self) -> str: """ return self.program.proto.mnemonic[self.pb_instr.mnemonic_index].name - @property - def uncached_operands(self) -> list[OperandBinExport]: + @cached_property + def operands(self) -> List[OperandBinExport]: """ Returns a list of the operands instanciated dynamically on-demand. - The object returned is not cached, calling this function multiple times will - create the same object multiple times. If you want to cache the object you - should use `InstructionBinExport.operands`. + The list is cached by default, to erase the cache delete the attribute. + + :return: list of operands """ + return [ OperandBinExport(self._program, self._function, weakref.ref(self), op_idx) for op_idx in self.pb_instr.operand_index ] - - @cached_property - def operands(self) -> List[OperandBinExport]: - """ - Returns a list of the operands instanciated dynamically on-demand. - The list is cached by default, to erase the cache delete the attribute. - """ - return self.uncached_operands diff --git a/src/binexport/operand.py b/src/binexport/operand.py index 9e4dcd3..b35a6b5 100644 --- a/src/binexport/operand.py +++ b/src/binexport/operand.py @@ -114,14 +114,14 @@ def pb_operand(self) -> "BinExport2.Operand": """ return self.program.proto.operand[self._idx] - @property - def uncached_expressions(self) -> List[ExpressionBinExport]: + @cached_property + def expressions(self) -> List[ExpressionBinExport]: """ Iterates over all the operand expression in a pre-order manner (binary operator first). - The object returned is not cached, calling this function multiple times will - create the same object multiple times. If you want to cache the object you - should use `OperandBinExport.expressions`. + The list is cached by default, to erase the cache delete the attribute + + :return: list of expressions """ expr_dict = {} # {expression protobuf idx : ExpressionBinExport} @@ -133,13 +133,3 @@ def uncached_expressions(self) -> List[ExpressionBinExport]: self.program, self.function, self.instruction, exp_idx, parent ) return list(expr_dict.values()) - - @cached_property - def expressions(self) -> List[ExpressionBinExport]: - """ - Iterates over all the operand expression in a pre-order manner - (binary operator first). - The list is cached by default, to erase the cache delete the attribute - """ - - return self.uncached_expressions