Skip to content

Commit

Permalink
Add "artifact_type" to Artifact dumping format (#137)
Browse files Browse the repository at this point in the history
* Add "artifact_type" to Artifact dumping format

* remove dead code
  • Loading branch information
mahaloz authored Nov 24, 2024
1 parent 669215d commit 067dac5
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 4 deletions.
2 changes: 1 addition & 1 deletion libbs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.7.0"
__version__ = "2.8.0"


import logging
Expand Down
61 changes: 61 additions & 0 deletions libbs/artifacts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import json

import toml

from .formatting import TomlHexEncoder, ArtifactFormat
from .artifact import Artifact
from .comment import Comment
Expand Down Expand Up @@ -26,3 +30,60 @@
Context.__name__: Context,
Typedef.__name__: Typedef,
}


def _dict_from_str(art_str: str, fmt=ArtifactFormat.TOML) -> dict:
if fmt == ArtifactFormat.TOML:
return toml.loads(art_str)
elif fmt == ArtifactFormat.JSON:
return json.loads(art_str)
else:
raise ValueError(f"Loading from format {fmt} is not yet supported.")


def _art_from_dict(art_dict: dict) -> Artifact:
art_type_str = art_dict.get(Artifact.ART_TYPE_STR, None)
if art_type_str is None:
raise ValueError(f"Artifact type string not found in artifact data: {art_dict}. Is this a valid artifact?")

art_cls = ART_NAME_TO_CLS[art_type_str]
art = art_cls()
art.__setstate__(art_dict)
return art


def _load_arts_from_list(art_strs: list[str], fmt=ArtifactFormat.TOML) -> list[Artifact]:
arts = []
for art_str in art_strs:
data_dict = _dict_from_str(art_str, fmt=fmt)
art = _art_from_dict(data_dict)
arts.append(art)
return arts


def _load_arts_from_string(art_str: str, fmt=ArtifactFormat.TOML) -> list[Artifact]:
data_dict = _dict_from_str(art_str, fmt=fmt)
if isinstance(data_dict, dict):
data_dicts = list(data_dict.values())
elif isinstance(data_dict, list):
data_dicts = data_dict
else:
raise ValueError(f"Unexpected data type: {type(data_dict)}")

arts = []
for v in data_dicts:
art = _art_from_dict(v)
arts.append(art)

return arts


def load_many_artifacts(art_strings: list[str], fmt=ArtifactFormat.TOML) -> list[Artifact]:
"""
A helper function to load many dumped artifacts from a list of strings. Each string should have been dumped
using the `dumps` method of an artifact.
:param art_strings: A list of strings or a single string containing multiple dumped artifacts.
:param fmt: The format of the dumped artifacts.
"""
return _load_arts_from_list(art_strings, fmt=fmt)
10 changes: 8 additions & 2 deletions libbs/artifacts/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ class Artifact:
"""
The Artifact class acts as the base for all other artifacts that can be produced by a decompiler (or decompiler
adjacent tool). In general, the comparisons of these derived classes should only be done on the attributes in
__slots__, with the exception of the last_change property.
__slots__, except for the last_change property.
"""
LST_CHNG_ATTR = "last_change"
ADDR_ATTR = "addr"
ART_TYPE_STR = "artifact_type"

ATTR_ATTR_IGNORE_SET = "_attr_ignore_set"
__slots__ = (
LST_CHNG_ATTR,
Expand Down Expand Up @@ -99,6 +101,8 @@ def _from_c_string(cls, cstring) -> Dict:

def dumps(self, fmt=ArtifactFormat.TOML) -> str:
dict_data = self.__getstate__()
# encode the artifact type
dict_data.update({self.ART_TYPE_STR: self.__class__.__name__})
if fmt == ArtifactFormat.TOML:
return toml.dumps(dict_data, encoder=TomlHexEncoder())
elif fmt == ArtifactFormat.JSON:
Expand All @@ -123,6 +127,8 @@ def loads(cls, string, fmt=ArtifactFormat.TOML) -> "Artifact":
else:
raise ValueError(f"Loading from format {fmt} is not yet supported.")

# remove the artifact type (if it exists)
dict_data.pop(Artifact.ART_TYPE_STR, None)
art = cls()
art.__setstate__(dict_data)
return art
Expand Down Expand Up @@ -150,7 +156,7 @@ def dumps_many(cls, artifacts: List["Artifact"], key_attr=ADDR_ATTR, fmt=Artifac
raise ValueError(f"Dumping many to format {fmt} is not yet supported.")

@classmethod
def loads_many(cls, string, fmt=ArtifactFormat.TOML) -> List["Artifact"]:
def loads_many(cls, string: str, fmt=ArtifactFormat.TOML) -> List["Artifact"]:
if fmt == ArtifactFormat.TOML:
dict_data = toml.loads(string)
elif fmt == ArtifactFormat.JSON:
Expand Down
26 changes: 25 additions & 1 deletion tests/test_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

import toml
from libbs.artifacts import (
FunctionHeader, StackVariable, FunctionArgument, Function, ArtifactFormat, Struct, StructMember
FunctionHeader, StackVariable, FunctionArgument, Function, ArtifactFormat, Struct, StructMember,
load_many_artifacts, Artifact
)


Expand Down Expand Up @@ -170,6 +171,29 @@ def test_serialization(self):
loaded_struct = Struct.loads(serialized_struct, fmt=fmt)
assert loaded_struct == struct

def test_many_deserialization(self):
func, _ = generate_test_funcs(0x400000)
struct = Struct(name="some_struct", size=8, members={
0: StructMember(offset=0, name="m0", type_="int", size=4),
4: StructMember(offset=4, name="m4", type_="long", size=8)
})

# test loading many in a list of strings
data_strs = [func.dumps(fmt=ArtifactFormat.JSON), struct.dumps(fmt=ArtifactFormat.JSON)]
for data_str in data_strs:
data_dict = json.loads(data_str)
# the ART_TYPE_STR should be in the data to tell you what type of artifact it is
assert Artifact.ART_TYPE_STR in data_dict

loaded_arts = load_many_artifacts(data_strs, fmt=ArtifactFormat.JSON)
assert len(loaded_arts) == 2

loaded_func = loaded_arts[0]
assert loaded_func == func

loaded_struct = loaded_arts[1]
assert loaded_struct == struct


if __name__ == "__main__":
unittest.main(argv=sys.argv)

0 comments on commit 067dac5

Please sign in to comment.