Skip to content

Commit

Permalink
Merge pull request #54 from quarkslab/add_csv_exporter
Browse files Browse the repository at this point in the history
Add csv exporter
  • Loading branch information
RobinDavid authored Feb 13, 2024
2 parents 58ff490 + 9743497 commit e601217
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 165 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ The complete command line options are:
-e1, --executable1 PATH Path to the primary raw executable. Must be provided if using quokka loader
-e2, --executable2 PATH Path to the secondary raw executable. Must be provided if using quokka loader
-o, --output PATH Write output to PATH
-ff, --file-format [bindiff] The file format of the output file. Supported formats are [bindiff] [default:
bindiff]
-ff, --file-format [bindiff|csv]
The file format of the output file [default: csv]
-v, --verbose Activate debugging messages. Can be supplied multiple times to increase verbosity
--version Show the version and exit.
--arch-primary TEXT Force the architecture when disassembling for the primary. Format is
Expand Down
10 changes: 10 additions & 0 deletions doc/source/api/differ.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ GenericGraph
:undoc-members:
:exclude-members:

GenericNode
-----------

.. autoclass:: qbindiff.GenericNode
:members:
:show-inheritance:
:inherited-members:
:undoc-members:
:exclude-members:

Differ
------

Expand Down
47 changes: 18 additions & 29 deletions doc/source/export.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,32 @@ Given a ``differ`` object initialized, with two binaries to diffs, the diffing a
.. code-block:: python
matches = differ.compute_matching()
differ.export_to_bindiff('/path/to/output.BinDiff'))
differ.export_to_bindiff('/path/to/output.BinDiff')
CSV
---

If the diff, does not represent a binary diff, or for further processing the diff
can also be saved in .csv file.
it can also be saved in .csv file.
This is the default file format as it is very lightweight and fast to generate.

TODO: We really have to write the CSV ourselves ? There is not utility functions?
It can either be obtained using the CLI option ``-ff csv`` or by calling the right API as follows:

.. code-block:: python
import csv
from qbindiff.loader.types import FunctionType
matches = differ.compute_matching()
matches: Mapping = differ.compute_matching()
# This only exports base fields (address, similarity, confidence)
matches.to_csv("/path/to/output.csv")
# Add extra "name" field
matches.to_csv("/path/to/output.csv", "name")
with open('/path/to/output.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow((
'path_primary',
'func_addr_primary',
'func_name_primary',
'path_secondary',
'func_addr_secondary',
'func_name_secondary',
'similarity',
'confidence'
))
for match in matches:
writer.writerow((
differ.primary.name,
hex(match.primary.addr),
match.primary.name,
differ.secondary.name,
hex(match.secondary.addr),
match.primary.name,
match.similarity,
match.confidence
))
# Add extra "name" field and custom field
matches.to_csv(
"/path/to/output.csv",
"name",
("is_library", lambda f: f.type == FunctionType.library)
)
2 changes: 1 addition & 1 deletion src/qbindiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"""

from qbindiff.version import __version__
from qbindiff.abstract import GenericGraph
from qbindiff.abstract import GenericGraph, GenericNode
from qbindiff.differ import QBinDiff, DiGraphDiffer, GraphDiffer, Differ
from qbindiff.mapping import Mapping
from qbindiff.loader import Program, Function
Expand Down
8 changes: 5 additions & 3 deletions src/qbindiff/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ def list_features(ctx: click.Context, param: click.Parameter, value: Any) -> Non
"-ff",
"--file-format",
show_default=True,
default="bindiff",
type=click.Choice(["bindiff"]),
help=f"The file format of the output file. Supported formats are [bindiff]",
default="csv",
type=click.Choice(["bindiff", "csv"]),
help=f"The file format of the output file",
)
@click.option(
"-v",
Expand Down Expand Up @@ -385,6 +385,8 @@ def main(
logging.info("[+] Saving")
if file_format == "bindiff":
qbindiff.export_to_bindiff(output)
elif file_format == "csv":
qbindiff.mapping.to_csv(output, ("name", lambda f: f.name))
logging.info("[+] Mapping successfully saved to: %s" % output)


Expand Down
58 changes: 45 additions & 13 deletions src/qbindiff/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,30 @@
throught the qbindiff module (the differ, the matcher, the exporters, etc...).
"""

from __future__ import annotations
from abc import ABCMeta, abstractmethod
from collections.abc import Iterator
from typing import Any
from collections.abc import Hashable
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from collections.abc import Iterable
from typing import Any
from qbindiff.types import NodeLabel


class GenericNode(Hashable):
"""
Abstract class representing a generic node
"""

@abstractmethod
def get_label(self) -> NodeLabel:
"""
Get the label associated to this node
:returns: The node label associated with this node
"""
raise NotImplementedError()


class GenericGraph(metaclass=ABCMeta):
Expand All @@ -34,40 +55,51 @@ def __len__(self) -> int:
raise NotImplementedError()

@abstractmethod
def items(self) -> Iterator[tuple[Any, Any]]:
def items(self) -> Iterable[tuple[NodeLabel, GenericNode]]:
"""
Return an iterator over the items. Each item is {node_label: node}
Iterate over the items. Each item is {node_label: node}
:returns: A :py:class:`Iterable` over the items. Each item is
a tuple (node_label, node)
"""
raise NotImplementedError()

@abstractmethod
def get_node(self, node_label: Any):
def get_node(self, node_label: NodeLabel) -> GenericNode:
"""
Returns the node identified by the `node_label`
Get the node identified by the `node_label`
:param node_label: the unique identifier of the node
:returns: The node identified by the label
"""
raise NotImplementedError()

@property
@abstractmethod
def node_labels(self) -> Iterator[Any]:
def node_labels(self) -> Iterable[NodeLabel]:
"""
Return an iterator over the node labels
Iterate over the node labels
:returns: An :py:class:`Iterable` over the node labels
"""
raise NotImplementedError()

@property
@abstractmethod
def nodes(self) -> Iterator[Any]:
def nodes(self) -> Iterable[GenericNode]:
"""
Return an iterator over the nodes
Iterate over the nodes themselves
:returns: An :py:class:`Iterable` over the nodes
"""
raise NotImplementedError()

@property
@abstractmethod
def edges(self) -> Iterator[tuple[Any, Any]]:
def edges(self) -> Iterable[tuple[NodeLabel, NodeLabel]]:
"""
Return an iterator over the edges.
An edge is a pair (node_label_a, node_label_b)
Iterate over the edges. An edge is a pair (node_label_a, node_label_b)
:returns: An :py:class:`Iterable` over the edges.
"""
raise NotImplementedError()
34 changes: 24 additions & 10 deletions src/qbindiff/loader/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,25 @@
"""

from __future__ import annotations
import networkx
from collections.abc import Mapping, Generator
from collections.abc import Mapping
from typing import TYPE_CHECKING

from qbindiff.abstract import GenericNode
from qbindiff.loader import BasicBlock
from qbindiff.loader.types import FunctionType
from qbindiff.types import Addr
from qbindiff.loader.backend.abstract import AbstractFunctionBackend

if TYPE_CHECKING:
import networkx
from collections.abc import Generator
from qbindiff.loader.backend.abstract import AbstractFunctionBackend
from qbindiff.types import Addr

class Function(Mapping[Addr, BasicBlock]):

class Function(Mapping, GenericNode):
"""
Representation of a binary function.
This class is a dict of basic block addreses to the basic block.
This class is a non-mutable mapping between basic block's address and the basic block itself.
It lazily loads all the basic blocks when iterating through them or even accessing
one of them and it unloads all of them after the iteration has ended.
Expand All @@ -50,7 +55,7 @@ class Function(Mapping[Addr, BasicBlock]):
"""

def __init__(self, backend: AbstractFunctionBackend):
super(Function, self).__init__()
super().__init__()

# The basic blocks are lazily loaded
self._basic_blocks = None
Expand Down Expand Up @@ -94,7 +99,7 @@ def __getitem__(self, key: Addr) -> BasicBlock:
self._unload()
return bb

def __iter__(self) -> Generator[BasicBlock]:
def __iter__(self) -> Generator[BasicBlock, None, None]:
"""
Iterate over basic blocks, not addresses
"""
Expand All @@ -115,11 +120,11 @@ def __len__(self) -> int:
self._unload()
return size

def items(self) -> Generator[Addr, BasicBlock]:
def items(self) -> Generator[tuple[Addr, BasicBlock], None, None]:
"""
Returns a generator of tuples with addresses of basic blocks and the corresponding basic blocks objects
:return: generator (addr, basicblock)
:returns: generator (addr, basicblock)
"""

if self._basic_blocks is not None:
Expand Down Expand Up @@ -151,6 +156,15 @@ def _unload(self) -> None:
self._basic_blocks = None
self._backend.unload_blocks()

def get_label(self) -> Addr:
"""
Get the address associated to this function
:returns: The address associated with the function
"""

return self.addr

@property
def edges(self) -> list[tuple[Addr, Addr]]:
"""
Expand Down
Loading

0 comments on commit e601217

Please sign in to comment.