From c40b0a84b97e3ed6273eeb26c03e77824df54b25 Mon Sep 17 00:00:00 2001 From: James Johnson Date: Mon, 6 Jan 2020 22:42:04 -0800 Subject: [PATCH 1/3] Almost done with consecutive implicit array support, still needs to finalize after finishing parsing --- pfp/fields.py | 58 +++++++++++++++++++++++++++++++++++++++++++- pfp/interp.py | 1 + tests/test_arrays.py | 13 ++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/pfp/fields.py b/pfp/fields.py index 5a6add1..b3d30ec 100644 --- a/pfp/fields.py +++ b/pfp/fields.py @@ -662,6 +662,38 @@ class Void(Field): pass +@inherit_hash +class ImplicitArrayWrapper(Field): + """ + """ + + last_field = None + implicit_array = None + + def __init__(self, last_field, implicit_array): + """Redirect all attribute accesses to the ``last_field``, except for + array indexing. Array indexing is forwarded on to the + ``implicit_array``. + """ + super(Field, self).__setattr__("last_field", last_field) + super(Field, self).__setattr__("implicit_array", implicit_array) + + def __setattr__(self, name, value): + """Custom setattr that forwards all sets to the last_field + """ + return setattr(self.last_field, name, value) + + def __getattr__(self, name): + """Custom getattr that forwards all gets to last_field. + """ + return getattr(self.last_field, name) + + def __getitem__(self, key): + """Let this ImplicitArrayWrapper act like an array + """ + return self.implicit_array[key] + + @inherit_hash class Struct(Field): """The struct field""" @@ -671,6 +703,10 @@ class Struct(Field): _pfp__children = [] """All children of the struct, in order added""" + _pfp__implicit_arrays = {} + """Mapping of all implicit arrays in this struct. All implicit arrays will + be resolved to a concrete array after parsing is complete""" + _pfp__name_collisions = {} """Counters for any naming collisions""" @@ -679,6 +715,8 @@ class Struct(Field): def __init__(self, stream=None, metadata_processor=None): # ordered list of children super(Struct, self).__setattr__("_pfp__children", []) + # initialize implicit arrays for this struct instance + super(Struct, self).__setattr__("_pfp__implicit_arrays", {}) # for quick child access super(Struct, self).__setattr__("_pfp__children_map", {}) @@ -747,7 +785,25 @@ def _pfp__add_child(self, name, child, stream=None, overwrite=False): ): return self._pfp__handle_non_consecutive_duplicate(name, child) elif not overwrite and name in self._pfp__children_map: - return self._pfp__handle_implicit_array(name, child) + implicit_array = self._pfp__handle_implicit_array(name, child) + + # see #110 (https://github.com/d0c-s4vage/pfp/issues/110) + # during parsing, duplicate (implicit) arrays should always + # reference the last parsed variable of ``name``. However, if the + # variable ``name`` is indexed, then the nth duplicate array item + # should be returned. + # + # E.g. + # + # int x; + # int x; + # int x; + # Printf("%d\n", x); // prints the latest x value + # Printf("%d\n", x[0]); // prints the first x value + # + self._pfp__implicit_arrays[name] = implicit_array + self._pfp__children_map[name] = ImplicitArrayWrapper(child, implicit_array) + return child else: child._pfp__parent = self self._pfp__children.append(child) diff --git a/pfp/interp.py b/pfp/interp.py index 49e938d..ffe3705 100644 --- a/pfp/interp.py +++ b/pfp/interp.py @@ -817,6 +817,7 @@ def parse( self._dlog("parsed template into ast") res = self._run(keep_successful) + res._pfp__finalize() return res def step_over(self): diff --git a/tests/test_arrays.py b/tests/test_arrays.py index 28b30b5..fbe8992 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -85,6 +85,19 @@ def test_implicit_array_basic(self): self.assertEqual(dom.chars[2], ord("C")) self.assertEqual(dom.chars[3], ord("D")) + def test_implicit_array_same_behavior_as_010(self): + dom = self._test_parse_build( + "ABCD", + """ + while(!FEof()) { + char x; + Printf("%c", x); + } + """, + stdout="ABCD", + ) + __import__('pdb').set_trace() + def test_array_length1(self): dom = self._test_parse_build( "abcd", From 802d5bb737bf64e6236b09bc06a416b2528cc44b Mon Sep 17 00:00:00 2001 From: James Johnson Date: Tue, 7 Jan 2020 07:11:42 -0800 Subject: [PATCH 2/3] Done with the implementation, need to update the documentation to make the user aware of this current limitation in pfp. --- pfp/fields.py | 24 +++++++++++++++++++++--- tests/test_arrays.py | 3 ++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pfp/fields.py b/pfp/fields.py index b3d30ec..acfa035 100644 --- a/pfp/fields.py +++ b/pfp/fields.py @@ -728,6 +728,23 @@ def __init__(self, stream=None, metadata_processor=None): if stream is not None: self._pfp__offset = stream.tell() + def _pfp__finalize(self): + """Finalize the results of parsing the data. Currently this involves: + + * resolving implicit arrays to concrete arrays + """ + to_swap = [] + for child_name, child in six.iteritems(self._pfp__children_map): + if isinstance(child, Struct): + child._pfp__finalize() + continue + if child_name not in self._pfp__implicit_arrays: + continue + to_swap.append((child_name, child)) + + for child_name, child in to_swap: + self._pfp__children_map[child_name] = self._pfp__implicit_arrays[child_name] + def _pfp__snapshot(self, recurse=True): """Save off the current value of the field """ @@ -868,13 +885,14 @@ def _pfp__handle_implicit_array(self, name, child): """Handle inserting implicit array elements """ existing_child = self._pfp__children_map[name] - if isinstance(existing_child, Array): + existing_implicit_array = self._pfp__implicit_arrays.get(name, None) + if isinstance(existing_implicit_array, Array): # I don't think we should check this # # if existing_child.field_cls != child.__class__: # raise errors.PfpError("implicit arrays must be sequential!") - existing_child.append(child) - return existing_child + existing_implicit_array.append(child) + return existing_implicit_array else: cls = ( child._pfp__class diff --git a/tests/test_arrays.py b/tests/test_arrays.py index fbe8992..450995d 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -96,7 +96,8 @@ def test_implicit_array_same_behavior_as_010(self): """, stdout="ABCD", ) - __import__('pdb').set_trace() + self.assertIsInstance(dom.x, Array) + self.assertEqual(dom.x, b"ABCD") def test_array_length1(self): dom = self._test_parse_build( From dd71c41056869df9ac3b64c76244038af5a6dcff Mon Sep 17 00:00:00 2001 From: James Johnson Date: Tue, 7 Jan 2020 20:16:02 -0800 Subject: [PATCH 3/3] Updated documentation --- docs/source/differences.rst | 79 +++++++++++++++++++++++++++++++++++++ docs/source/index.rst | 7 ++++ 2 files changed, 86 insertions(+) create mode 100644 docs/source/differences.rst diff --git a/docs/source/differences.rst b/docs/source/differences.rst new file mode 100644 index 0000000..6ca5f91 --- /dev/null +++ b/docs/source/differences.rst @@ -0,0 +1,79 @@ + +.. _differences: + +Differences Between 010 and pfp +=============================== + +This section documents the known differences between pfp and 010 editor. + +.. toctree:: + :maxdepth: 1 + +Duplicate Arrays +---------------- + +*TLDR*: Pfp does not [yet] support non-consecutive duplicate arrays. +Consecutive duplicate arrays are fully supported. + +First, some definitions and back story. + +Duplicate arrays are what occurs when multiple variables of the same name +are declared in the same scope. E.g.: + +.. code-block:: c + + int x; + int x; + if (x[0] == x[1] || x[0] == x) { + Printf("Same!"); + } + +The 010 template script above declares ``x`` twice, creating a duplicate, or +as pfp originally called it, an implicit array. Notice the two comparisons - +they actually perform the same comparison: + +.. code-block:: c + + x[0] != x[1] + +and + +.. code-block:: c + + x[0] == x + +In 010, if the duplicate/implicit array is referenced without indexing, the +most recently parsed field in the duplicate array is returned. I.e., it's treated +as a normal field and not an array. However, if indexing is done on the duplicate +array variable, the variable is treated as an array. + +Below is a quote on duplicate arrays from the +`010 Editor documentation `_: + + When writing a template, regular arrays can be declaring using the same syntax + as scripts (see Arrays and Strings). However, 010 Editor has a syntax that + allows arrays to be built in a special way. When declaring template variables, + multiple copies of the same variable can be declared. For example: + + .. code-block:: c + + int x; + int y; + int x; + + 010 Editor allows you to treat the multiple declarations of the variable as + an array (this is called a Duplicate Array). In this example, x[0] could + be used to reference the first occurrence of x and x[1] could be used to + reference the second occurrence of x. Duplicate arrays can even be defined + with for or while loops. For example: + + .. code-block:: c + + local int i; + for( i = 0; i < 5; i++ ) + int x; + +This breaks down in pfp when non-consecutive arrays are created, as is done +in the first code sample from the 010 Editor documentation above. +`Issue #111 `_ tracks the effort +to add support for non-consecutive duplicate arrays. diff --git a/docs/source/index.rst b/docs/source/index.rst index 9d0c67f..2c8a1b3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -106,6 +106,12 @@ find the ``tEXt`` chunk, and change the comment: :: chunk.data.tEXt.comment = "NEW COMMENT" print("Comment after: {}".format(chunk.data.tEXt.comment)) +Notes +----- + +A few differences do exist between 010 Editor and pfp. See the +:ref:`differences` section for specific, documented differences. + Contents: @@ -120,6 +126,7 @@ Contents: interpreter functions bitstream + differences .. automodule:: pfp :members: