diff --git a/libbs/__init__.py b/libbs/__init__.py index 52962ce..7d472e3 100644 --- a/libbs/__init__.py +++ b/libbs/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.7.0" +__version__ = "2.8.0" import logging diff --git a/libbs/artifacts/__init__.py b/libbs/artifacts/__init__.py index 709233a..659db42 100644 --- a/libbs/artifacts/__init__.py +++ b/libbs/artifacts/__init__.py @@ -1,3 +1,7 @@ +import json + +import toml + from .formatting import TomlHexEncoder, ArtifactFormat from .artifact import Artifact from .comment import Comment @@ -26,3 +30,60 @@ Context.__name__: Context, Typedef.__name__: Typedef, } + + +def _dict_from_str(art_str: str, fmt=ArtifactFormat.TOML) -> dict: + if fmt == ArtifactFormat.TOML: + return toml.loads(art_str) + elif fmt == ArtifactFormat.JSON: + return json.loads(art_str) + else: + raise ValueError(f"Loading from format {fmt} is not yet supported.") + + +def _art_from_dict(art_dict: dict) -> Artifact: + art_type_str = art_dict.get(Artifact.ART_TYPE_STR, None) + if art_type_str is None: + raise ValueError(f"Artifact type string not found in artifact data: {art_dict}. Is this a valid artifact?") + + art_cls = ART_NAME_TO_CLS[art_type_str] + art = art_cls() + art.__setstate__(art_dict) + return art + + +def _load_arts_from_list(art_strs: list[str], fmt=ArtifactFormat.TOML) -> list[Artifact]: + arts = [] + for art_str in art_strs: + data_dict = _dict_from_str(art_str, fmt=fmt) + art = _art_from_dict(data_dict) + arts.append(art) + return arts + + +def _load_arts_from_string(art_str: str, fmt=ArtifactFormat.TOML) -> list[Artifact]: + data_dict = _dict_from_str(art_str, fmt=fmt) + if isinstance(data_dict, dict): + data_dicts = list(data_dict.values()) + elif isinstance(data_dict, list): + data_dicts = data_dict + else: + raise ValueError(f"Unexpected data type: {type(data_dict)}") + + arts = [] + for v in data_dicts: + art = _art_from_dict(v) + arts.append(art) + + return arts + + +def load_many_artifacts(art_strings: list[str], fmt=ArtifactFormat.TOML) -> list[Artifact]: + """ + A helper function to load many dumped artifacts from a list of strings. Each string should have been dumped + using the `dumps` method of an artifact. + + :param art_strings: A list of strings or a single string containing multiple dumped artifacts. + :param fmt: The format of the dumped artifacts. + """ + return _load_arts_from_list(art_strings, fmt=fmt) diff --git a/libbs/artifacts/artifact.py b/libbs/artifacts/artifact.py index c964c50..710f6ea 100644 --- a/libbs/artifacts/artifact.py +++ b/libbs/artifacts/artifact.py @@ -11,10 +11,12 @@ class Artifact: """ The Artifact class acts as the base for all other artifacts that can be produced by a decompiler (or decompiler adjacent tool). In general, the comparisons of these derived classes should only be done on the attributes in - __slots__, with the exception of the last_change property. + __slots__, except for the last_change property. """ LST_CHNG_ATTR = "last_change" ADDR_ATTR = "addr" + ART_TYPE_STR = "artifact_type" + ATTR_ATTR_IGNORE_SET = "_attr_ignore_set" __slots__ = ( LST_CHNG_ATTR, @@ -99,6 +101,8 @@ def _from_c_string(cls, cstring) -> Dict: def dumps(self, fmt=ArtifactFormat.TOML) -> str: dict_data = self.__getstate__() + # encode the artifact type + dict_data.update({self.ART_TYPE_STR: self.__class__.__name__}) if fmt == ArtifactFormat.TOML: return toml.dumps(dict_data, encoder=TomlHexEncoder()) elif fmt == ArtifactFormat.JSON: @@ -123,6 +127,8 @@ def loads(cls, string, fmt=ArtifactFormat.TOML) -> "Artifact": else: raise ValueError(f"Loading from format {fmt} is not yet supported.") + # remove the artifact type (if it exists) + dict_data.pop(Artifact.ART_TYPE_STR, None) art = cls() art.__setstate__(dict_data) return art @@ -150,7 +156,7 @@ def dumps_many(cls, artifacts: List["Artifact"], key_attr=ADDR_ATTR, fmt=Artifac raise ValueError(f"Dumping many to format {fmt} is not yet supported.") @classmethod - def loads_many(cls, string, fmt=ArtifactFormat.TOML) -> List["Artifact"]: + def loads_many(cls, string: str, fmt=ArtifactFormat.TOML) -> List["Artifact"]: if fmt == ArtifactFormat.TOML: dict_data = toml.loads(string) elif fmt == ArtifactFormat.JSON: diff --git a/tests/test_artifacts.py b/tests/test_artifacts.py index 6d557d4..65051d6 100644 --- a/tests/test_artifacts.py +++ b/tests/test_artifacts.py @@ -5,7 +5,8 @@ import toml from libbs.artifacts import ( - FunctionHeader, StackVariable, FunctionArgument, Function, ArtifactFormat, Struct, StructMember + FunctionHeader, StackVariable, FunctionArgument, Function, ArtifactFormat, Struct, StructMember, + load_many_artifacts, Artifact ) @@ -170,6 +171,29 @@ def test_serialization(self): loaded_struct = Struct.loads(serialized_struct, fmt=fmt) assert loaded_struct == struct + def test_many_deserialization(self): + func, _ = generate_test_funcs(0x400000) + struct = Struct(name="some_struct", size=8, members={ + 0: StructMember(offset=0, name="m0", type_="int", size=4), + 4: StructMember(offset=4, name="m4", type_="long", size=8) + }) + + # test loading many in a list of strings + data_strs = [func.dumps(fmt=ArtifactFormat.JSON), struct.dumps(fmt=ArtifactFormat.JSON)] + for data_str in data_strs: + data_dict = json.loads(data_str) + # the ART_TYPE_STR should be in the data to tell you what type of artifact it is + assert Artifact.ART_TYPE_STR in data_dict + + loaded_arts = load_many_artifacts(data_strs, fmt=ArtifactFormat.JSON) + assert len(loaded_arts) == 2 + + loaded_func = loaded_arts[0] + assert loaded_func == func + + loaded_struct = loaded_arts[1] + assert loaded_struct == struct + if __name__ == "__main__": unittest.main(argv=sys.argv)