From 0dad549b7d21aa651d919fd7a71f98ec8438544a Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Mon, 25 Mar 2024 02:54:07 +0000 Subject: [PATCH 1/4] dnfile 0.15.0 changed API --- capa/features/extractors/dnfile/helpers.py | 42 +++++++++++----------- capa/features/extractors/dotnetfile.py | 4 +-- pyproject.toml | 2 +- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 811568497..ac2ce9a57 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str return None try: - user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) + user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid) except UnicodeDecodeError as e: logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e) return None @@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: access: Optional[str] # assume .NET imports starting with get_/set_ are used to access a property - if member_ref.Name.startswith("get_"): + if str(member_ref.Name).startswith("get_"): access = FeatureAccess.READ - elif member_ref.Name.startswith("set_"): + elif str(member_ref.Name).startswith("set_"): access = FeatureAccess.WRITE else: access = None - member_ref_name: str = member_ref.Name + member_ref_name: str = str(member_ref.Name) if member_ref_name.startswith(("get_", "set_")): # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] @@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: token: int = calculate_dotnet_token_value(method.table.number, method.row_index) access: Optional[str] = accessor_map.get(token) - method_name: str = method.row.Name + method_name: str = str(method.row.Name) if method_name.startswith(("get_", "set_")): # remove get_/set_ method_name = method_name[4:] @@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] logger.debug("ImplMap[0x%X] ImportScope row is None", rid) module = "" else: - module = impl_map.ImportScope.row.Name - method: str = impl_map.ImportName + module = str(impl_map.ImportScope.row.Name) + method: str = str(impl_map.ImportName) member_forward_table: int if impl_map.MemberForwarded.table is None: @@ -334,7 +334,7 @@ def resolve_nested_typedef_name( if index in nested_class_table: typedef_name = [] - name = typedef.TypeName + name = str(typedef.TypeName) # Append the current typedef name typedef_name.append(name) @@ -343,24 +343,24 @@ def resolve_nested_typedef_name( # Iterate through the typedef table to resolve the nested name table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) if table_row is None: - return typedef.TypeNamespace, tuple(typedef_name[::-1]) + return str(typedef.TypeNamespace), tuple(typedef_name[::-1]) - name = table_row.TypeName + name = str(table_row.TypeName) typedef_name.append(name) index = nested_class_table[index] # Document the root enclosing details table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) if table_row is None: - return typedef.TypeNamespace, tuple(typedef_name[::-1]) + return str(typedef.TypeNamespace), tuple(typedef_name[::-1]) - enclosing_name = table_row.TypeName + enclosing_name = str(table_row.TypeName) typedef_name.append(enclosing_name) - return table_row.TypeNamespace, tuple(typedef_name[::-1]) + return str(table_row.TypeNamespace), tuple(typedef_name[::-1]) else: - return typedef.TypeNamespace, (typedef.TypeName,) + return str(typedef.TypeNamespace), (str(typedef.TypeName),) def resolve_nested_typeref_name( @@ -370,29 +370,29 @@ def resolve_nested_typeref_name( # If the ResolutionScope decodes to a typeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): typeref_name = [] - name = typeref.TypeName + name = str(typeref.TypeName) # Not appending the current typeref name to avoid potential duplicate # Validate index table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index) if table_row is None: - return typeref.TypeNamespace, (typeref.TypeName,) + return str(typeref.TypeNamespace), (str(typeref.TypeName),) while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef): # Iterate through the typeref table to resolve the nested name typeref_name.append(name) - name = table_row.TypeName + name = str(table_row.TypeName) table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index) if table_row is None: - return typeref.TypeNamespace, tuple(typeref_name[::-1]) + return str(typeref.TypeNamespace), tuple(typeref_name[::-1]) # Document the root enclosing details - typeref_name.append(table_row.TypeName) + typeref_name.append(str(table_row.TypeName)) - return table_row.TypeNamespace, tuple(typeref_name[::-1]) + return str(table_row.TypeNamespace), tuple(typeref_name[::-1]) else: - return typeref.TypeNamespace, (typeref.TypeName,) + return str(typeref.TypeNamespace), (str(typeref.TypeName),) def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]: diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 75bf32dcb..5ab998579 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): # emit internal .NET namespaces assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - namespaces.add(typedef.TypeNamespace) + namespaces.add(str(typedef.TypeNamespace)) for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): # emit external .NET namespaces assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - namespaces.add(typeref.TypeNamespace) + namespaces.add(str(typeref.TypeNamespace)) # namespaces may be empty, discard namespaces.discard("") diff --git a/pyproject.toml b/pyproject.toml index 55b043fa9..f3ad0237f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "vivisect==1.1.1", "pefile==2023.2.7", "pyelftools==0.30", - "dnfile==0.14.1", + "dnfile==0.15.0", "dncil==1.0.2", "pydantic==2.4.0", "protobuf==4.23.4", From 0b615b11700e43c7285d292f4aaab12d2c043b85 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Sun, 19 May 2024 04:22:18 +0000 Subject: [PATCH 2/4] deduplicate str() calls and isort fixes --- capa/features/extractors/dnfile/helpers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index ac2ce9a57..a1c1e590d 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -9,13 +9,13 @@ from __future__ import annotations import logging -from typing import Dict, Tuple, Union, Iterator, Optional +from typing import Dict, Iterator, Optional, Tuple, Union import dnfile from dncil.cil.body import CilMethodBody -from dncil.cil.error import MethodBodyFormatError -from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.body.reader import CilMethodBodyReaderBase +from dncil.cil.error import MethodBodyFormatError +from dncil.clr.token import InvalidToken, StringToken, Token from capa.features.common import FeatureAccess from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod @@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: access: Optional[str] # assume .NET imports starting with get_/set_ are used to access a property - if str(member_ref.Name).startswith("get_"): + member_ref_name: str = str(member_ref.Name) + if member_ref_name.startswith("get_"): access = FeatureAccess.READ - elif str(member_ref.Name).startswith("set_"): + elif member_ref_name.startswith("set_"): access = FeatureAccess.WRITE else: access = None - member_ref_name: str = str(member_ref.Name) if member_ref_name.startswith(("get_", "set_")): # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] From 9548a39c3a80990e9b07984a670619df1afaf654 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Sun, 19 May 2024 04:29:58 +0000 Subject: [PATCH 3/4] revert accidental change to imports ordering --- capa/features/extractors/dnfile/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index a1c1e590d..a85e855d3 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -9,13 +9,13 @@ from __future__ import annotations import logging -from typing import Dict, Iterator, Optional, Tuple, Union +from typing import Dict, Tuple, Union, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody -from dncil.cil.body.reader import CilMethodBodyReaderBase from dncil.cil.error import MethodBodyFormatError -from dncil.clr.token import InvalidToken, StringToken, Token +from dncil.clr.token import Token, StringToken, InvalidToken +from dncil.cil.body.reader import CilMethodBodyReaderBase from capa.features.common import FeatureAccess from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod From 433a685d9499b6f96534ecf47b5e68047e37a71a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 11 Jun 2024 12:09:08 +0000 Subject: [PATCH 4/4] add table variable annotation --- capa/features/extractors/dnfile/helpers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index a85e855d3..d7f4499ec 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -320,8 +320,11 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O if row_index - 1 <= 0: return None + table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index) + if table is None: + return None + try: - table = pe.net.mdtables.tables.get(table_index, []) return table[row_index - 1] except IndexError: return None