From a408629ae57572832edc653b1afc09cdf4f11683 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 11 Jun 2024 01:13:35 +0100 Subject: [PATCH 01/59] initial commit --- capa/features/address.py | 3 +- capa/features/common.py | 2 + capa/features/extractors/drakvuf/call.py | 56 ++++++++ capa/features/extractors/drakvuf/extractor.py | 98 ++++++++++++++ capa/features/extractors/drakvuf/file.py | 62 +++++++++ capa/features/extractors/drakvuf/global_.py | 38 ++++++ capa/features/extractors/drakvuf/helpers.py | 34 +++++ capa/features/extractors/drakvuf/models.py | 124 ++++++++++++++++++ capa/features/extractors/drakvuf/process.py | 38 ++++++ capa/features/extractors/drakvuf/thread.py | 24 ++++ capa/helpers.py | 57 +++++++- capa/ida/plugin/form.py | 6 +- capa/loader.py | 18 +++ capa/main.py | 27 +++- capa/render/result_document.py | 9 +- pyproject.toml | 3 +- tests/fixtures.py | 97 +++++++++++++- tests/test_cape_features.py | 4 +- tests/test_drakvuf_features.py | 27 ++++ 19 files changed, 707 insertions(+), 20 deletions(-) create mode 100644 capa/features/extractors/drakvuf/call.py create mode 100644 capa/features/extractors/drakvuf/extractor.py create mode 100644 capa/features/extractors/drakvuf/file.py create mode 100644 capa/features/extractors/drakvuf/global_.py create mode 100644 capa/features/extractors/drakvuf/helpers.py create mode 100644 capa/features/extractors/drakvuf/models.py create mode 100644 capa/features/extractors/drakvuf/process.py create mode 100644 capa/features/extractors/drakvuf/thread.py create mode 100644 tests/test_drakvuf_features.py diff --git a/capa/features/address.py b/capa/features/address.py index 45c3a600f..f0cbd77d2 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -10,7 +10,8 @@ class Address(abc.ABC): @abc.abstractmethod - def __eq__(self, other): ... + def __eq__(self, other): + ... @abc.abstractmethod def __lt__(self, other): diff --git a/capa/features/common.py b/capa/features/common.py index 4b02b5ced..28ddc59c6 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -458,6 +458,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): FORMAT_SC32 = "sc32" FORMAT_SC64 = "sc64" FORMAT_CAPE = "cape" +FORMAT_DRAKVUF = "drakvuf" FORMAT_FREEZE = "freeze" FORMAT_RESULT = "result" STATIC_FORMATS = { @@ -471,6 +472,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): } DYNAMIC_FORMATS = { FORMAT_CAPE, + FORMAT_DRAKVUF, FORMAT_FREEZE, FORMAT_RESULT, } diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py new file mode 100644 index 000000000..d979cae97 --- /dev/null +++ b/capa/features/extractors/drakvuf/call.py @@ -0,0 +1,56 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import logging +from typing import Tuple, Iterator + +from capa.features.insn import API, Number +from capa.features.common import String, Feature +from capa.features.address import Address +from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle +from capa.features.extractors.drakvuf.models import Call + +logger = logging.getLogger(__name__) + + +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: + """ + this method extracts the given call's features (such as API name and arguments), + and returns them as API, Number, and String features. + + args: + ph: process handle (for defining the extraction scope) + th: thread handle (for defining the extraction scope) + ch: call handle (for defining the extraction scope) + + yields: + Feature, address; where Feature is either: API, Number, or String. + """ + call: Call = ch.inner + + # list similar to disassembly: arguments right-to-left, call + for arg_value in call.arguments.values(): + if arg_value.isdecimal(): + yield Number(int(arg_value)), ch.address + else: + try: + yield Number(int(arg_value, 16)), ch.address + except: + # yield it as a string + yield String(arg_value), ch.address + + yield API(call.name), ch.address + + +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: + for handler in CALL_HANDLERS: + for feature, addr in handler(ph, th, ch): + yield feature, addr + + +CALL_HANDLERS = (extract_call_features,) diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py new file mode 100644 index 000000000..577e5f989 --- /dev/null +++ b/capa/features/extractors/drakvuf/extractor.py @@ -0,0 +1,98 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import logging +from typing import Dict, List, Tuple, Union, Iterator + +import capa.features.extractors.drakvuf.call +import capa.features.extractors.drakvuf.file +import capa.features.extractors.drakvuf.thread +import capa.features.extractors.drakvuf.global_ +import capa.features.extractors.drakvuf.process +from capa.features.common import Feature, Characteristic +from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, AbsoluteVirtualAddress, _NoAddress +from capa.features.extractors.base_extractor import ( + CallHandle, + SampleHashes, + ThreadHandle, + ProcessHandle, + DynamicFeatureExtractor, +) +from capa.features.extractors.drakvuf.models import Call, DrakvufReport +from capa.features.extractors.drakvuf.helpers import sort_calls + +logger = logging.getLogger(__name__) + + +class DrakvufExtractor(DynamicFeatureExtractor): + def __init__(self, report: DrakvufReport): + super().__init__( + # DRAKVUF currently does not yield hash information about the sample in its output + hashes=SampleHashes(md5="", sha1="", sha256="") + ) + + self.report: DrakvufReport = report + + # sort the api calls to prevent going through the entire list each time + self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]] = sort_calls(report) + + # pre-compute these because we'll yield them at *every* scope. + self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report)) + + def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]: + # DRAKVUF currently does not yield information about the PE's address + return _NoAddress + + def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + yield from self.global_features + + def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + yield from capa.features.extractors.drakvuf.file.extract_features(self.report) + + def get_processes(self) -> Iterator[ProcessHandle]: + yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls) + + def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + yield from capa.features.extractors.drakvuf.process.extract_features(ph) + + def get_process_name(self, ph: ProcessHandle) -> str: + return ph.inner["process_name"] + + def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: + yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph) + + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + if False: + # force this routine to be a generator, + # but we don't actually have any elements to generate. + yield Characteristic("never"), NO_ADDRESS + return + + def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]: + yield from capa.features.extractors.drakvuf.thread.get_calls(self.sorted_calls, ph, th) + + def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> str: + call: Call = ch.inner + call_name = "{}({}){}".format( + call.name, + ", ".join(f"{arg_name}={arg_value}" for arg_name, arg_value in call.arguments.items()), + f" -> {call.return_value}" + if hasattr(call, "return_value") + else "", # SysCalls don't have a return value, while WinApi calls do + ) + return call_name + + def extract_call_features( + self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle + ) -> Iterator[Tuple[Feature, Address]]: + yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch) + + @classmethod + def from_report(cls, report: List[Dict]) -> "DrakvufExtractor": + dr = DrakvufReport.from_raw_report(report) + return DrakvufExtractor(report=dr) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py new file mode 100644 index 000000000..016df11f0 --- /dev/null +++ b/capa/features/extractors/drakvuf/file.py @@ -0,0 +1,62 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import logging +from typing import Dict, Tuple, Iterator + +from capa.features.file import Import +from capa.features.common import String, Feature +from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, AbsoluteVirtualAddress +from capa.features.extractors.helpers import generate_symbols +from capa.features.extractors.base_extractor import ProcessHandle +from capa.features.extractors.drakvuf.models import Call, DrakvufReport + +logger = logging.getLogger(__name__) + + +def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]]) -> Iterator[ProcessHandle]: + """ + get all the created processes for a sample + """ + for proc_addr, calls_per_thread in calls.items(): + sample_call: Call = next(iter(calls_per_thread.values()))[0] # get process name + yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name}) + + +def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + """ + extract imported function names + """ + if report.loaded_dlls is None: + return + dlls = report.loaded_dlls + + for dll in dlls: + dll_base_name = dll.name.split("\\")[-1] + for function_name, function_address in dll.imports.items(): + for name in generate_symbols(dll_base_name, function_name, include_dll=True): + yield Import(name), AbsoluteVirtualAddress(function_address) + + +def extract_file_strings(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + if report.discovered_dlls is None: + return + for dll in report.discovered_dlls: + yield String(dll.name), NO_ADDRESS + + +def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + for handler in FILE_HANDLERS: + for feature, addr in handler(report): + yield feature, addr + + +FILE_HANDLERS = ( + extract_import_names, + extract_file_strings, +) diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py new file mode 100644 index 000000000..3b30c8d44 --- /dev/null +++ b/capa/features/extractors/drakvuf/global_.py @@ -0,0 +1,38 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import logging +from typing import Tuple, Iterator + +from capa.features.common import OS, FORMAT_PE, OS_WINDOWS, Format, Feature +from capa.features.address import NO_ADDRESS, Address +from capa.features.extractors.drakvuf.models import DrakvufReport + +logger = logging.getLogger(__name__) + + +def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + # drakvuf sandbox currently supports only windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + yield Format(FORMAT_PE), NO_ADDRESS + + +def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + # drakvuf sandbox currently supports only windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + yield OS(OS_WINDOWS), NO_ADDRESS + + +def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + for global_handler in GLOBAL_HANDLER: + for feature, addr in global_handler(report): + yield feature, addr + + +GLOBAL_HANDLER = ( + extract_format, + extract_os, +) diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py new file mode 100644 index 000000000..686595648 --- /dev/null +++ b/capa/features/extractors/drakvuf/helpers.py @@ -0,0 +1,34 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from typing import Dict + +from capa.features.address import ThreadAddress, ProcessAddress +from capa.features.extractors.drakvuf.models import Call, DrakvufReport + + +def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, Call]]: + result = {} + for call in (*report.syscalls, *report.apicalls): + if call.pid < 1: + # ignore pid's with zero + continue + proc_addr = ProcessAddress(pid=call.pid, ppid=call.ppid) + thread_addr = ThreadAddress(process=proc_addr, tid=call.tid) + if proc_addr not in result: + result[proc_addr] = {} + if thread_addr not in result[proc_addr]: + result[proc_addr][thread_addr] = [] + + result[proc_addr][thread_addr].append(call) + + for proc, threads in result.items(): + for thread in threads: + result[proc][thread].sort(key=lambda call: call.timestamp) + + return result diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py new file mode 100644 index 000000000..b3fd4f6cb --- /dev/null +++ b/capa/features/extractors/drakvuf/models.py @@ -0,0 +1,124 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import logging +from typing import Any, Dict, List + +from pydantic import Field, BaseModel, ConfigDict, model_validator + +from capa.exceptions import EmptyReportError + +logger = logging.getLogger(__name__) + + +REQUIRED_SYSCALL_FIELD_NAMES = { + "Plugin", + "TimeStamp", + "PID", + "PPID", + "TID", + "UserName", + "UserId", + "ProcessName", + "Method", + "EventUID", + "Module", + "vCPU", + "CR3", + "Syscall", + "NArgs", +} + + +class ConciseModel(BaseModel): + ConfigDict(extra="ignore") + + +class DiscoveredDLL(ConciseModel): + plugin_name: str = Field(alias="Plugin") + event: str = Field(alias="Event") + name: str = Field(alias="DllName") + pid: int = Field(alias="PID") + + +class LoadedDLL(ConciseModel): + plugin_name: str = Field(alias="Plugin") + event: str = Field(alias="Event") + name: str = Field(alias="DllName") + imports: Dict[str, int] = Field(alias="Rva") + + +class Call(ConciseModel): + plugin_name: str = Field(alias="Plugin") + timestamp: str = Field(alias="TimeStamp") + process_name: str = Field(alias="ProcessName") + ppid: int = Field(alias="PPID") + pid: int = Field(alias="PID") + tid: int = Field(alias="TID") + name: str = Field(alias="Method") + arguments: Dict[str, str] + + +class WinApiCall(Call): + arguments: Dict[str, str] = Field(alias="Arguments") + event: str = Field(alias="Event") + return_value: str = Field(alias="ReturnValue") + + @model_validator(mode="before") + @classmethod + def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: + args = values["Arguments"] + values["Arguments"] = {name: val for name, val in map(lambda arg: arg.split("=", 1), args)} + return values + + +class SystemCall(Call): + syscall_number: int = Field(alias="Syscall") + module: str = Field(alias="Module") + nargs: int = Field(alias="NArgs") + + @model_validator(mode="before") + @classmethod + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + # Drakvuf stores argument names and vlaues as entries in the syscall's entry. + # This model validator collects those arguments into a list in the model. + values["arguments"] = { + name: value for name, value in values.items() if name not in REQUIRED_SYSCALL_FIELD_NAMES + } + return values + + +class DrakvufReport(ConciseModel): + syscalls: List[SystemCall] = None + apicalls: List[WinApiCall] = None + discovered_dlls: List[DiscoveredDLL] = None + loaded_dlls: List[LoadedDLL] = None + + @model_validator(mode="after") + def validate_arguments(self) -> "DrakvufReport": + if any((self.syscalls, self.apicalls, self.discovered_dlls, self.loaded_dlls)) is False: + raise EmptyReportError("Report is empty") + return self + + @classmethod + def from_raw_report(cls, entries: List[Dict]) -> "DrakvufReport": + values = {"syscalls": [], "apicalls": [], "discovered_dlls": [], "loaded_dlls": []} + + for entry in entries: + plugin = entry.get("Plugin") + if plugin == "syscall": + values["syscalls"].append(SystemCall(**entry)) + elif plugin == "apimon": + event = entry.get("Event", None) + if event == "api_called": + values["apicalls"].append(WinApiCall(**entry)) + elif event == "dll_loaded": + values["loaded_dlls"].append(LoadedDLL(**entry)) + elif event == "dll_discovered": + values["discovered_dlls"].append(DiscoveredDLL(**entry)) + + return DrakvufReport(**values) diff --git a/capa/features/extractors/drakvuf/process.py b/capa/features/extractors/drakvuf/process.py new file mode 100644 index 000000000..bbd6fee61 --- /dev/null +++ b/capa/features/extractors/drakvuf/process.py @@ -0,0 +1,38 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import logging +from typing import Dict, Tuple, Iterator + +from capa.features.common import String, Feature +from capa.features.address import Address, ThreadAddress, ProcessAddress +from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle +from capa.features.extractors.drakvuf.models import Call + +logger = logging.getLogger(__name__) + + +def get_threads(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]], ph: ProcessHandle) -> Iterator[ThreadHandle]: + """ + get the threads associated with a given process + """ + for thread_addr in calls[ph.address]: + yield ThreadHandle(address=thread_addr, inner={}) + + +def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + yield String(ph.inner["process_name"]), ph.address + + +def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + for handler in PROCESS_HANDLERS: + for feature, addr in handler(ph): + yield feature, addr + + +PROCESS_HANDLERS = (extract_process_name,) diff --git a/capa/features/extractors/drakvuf/thread.py b/capa/features/extractors/drakvuf/thread.py new file mode 100644 index 000000000..7d858be92 --- /dev/null +++ b/capa/features/extractors/drakvuf/thread.py @@ -0,0 +1,24 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import logging +from typing import Dict, Iterator + +from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress +from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle +from capa.features.extractors.drakvuf.models import Call + +logger = logging.getLogger(__name__) + + +def get_calls( + sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]], ph: ProcessHandle, th: ThreadHandle +) -> Iterator[CallHandle]: + for i, call in enumerate(sorted_calls[ph.address][th.address]): + call_addr = DynamicCallAddress(thread=th.address, id=i) + yield CallHandle(address=call_addr, inner=call) diff --git a/capa/helpers.py b/capa/helpers.py index 77380c7ed..0974b18c7 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -7,15 +7,15 @@ # See the License for the specific language governing permissions and limitations under the License. import sys import gzip -import json import inspect import logging import contextlib import importlib.util -from typing import NoReturn +from typing import Dict, Iterator, NoReturn from pathlib import Path import tqdm +from msgspec import json from capa.exceptions import UnsupportedFormatError from capa.features.common import ( @@ -25,13 +25,14 @@ FORMAT_SC64, FORMAT_DOTNET, FORMAT_FREEZE, + FORMAT_DRAKVUF, FORMAT_UNKNOWN, Format, ) EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") -EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz") +EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log") EXTENSIONS_ELF = "elf_" EXTENSIONS_FREEZE = "frz" @@ -76,13 +77,40 @@ def load_json_from_path(json_path: Path): try: report_json = compressed_report.read() except gzip.BadGzipFile: - report = json.load(json_path.open(encoding="utf-8")) + report = json.decode(json_path.read_text(encoding="utf-8")) else: - report = json.loads(report_json) + report = json.decode(report_json) return report +def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: + with open(jsonl_path, "rb") as f: + for line in f: + try: + line_s = line.strip().decode() + obj = json.decode(line_s) + yield obj + except: + # ignore erroneous lines + continue + + +def load_one_jsonl_from_path(jsonl_path: Path) -> str: + # this loads one json line to avoid the overhead of loading the entire file + with open(jsonl_path, "rb") as f: + line = next(iter(f)) + line = json.decode(line.decode(errors="ignore")) + return line + + def get_format_from_report(sample: Path) -> str: + if sample.name.endswith(".log"): + line = load_one_jsonl_from_path(sample) + if "Plugin" in line: + return FORMAT_DRAKVUF + else: + return FORMAT_UNKNOWN + report = load_json_from_path(sample) if "CAPE" in report: return FORMAT_CAPE @@ -189,6 +217,17 @@ def log_unsupported_cape_report_error(error: str): logger.error("-" * 80) +def log_unsupported_drakvuf_report_error(error: str): + logger.error("-" * 80) + logger.error(" Input file is not a valid DRAKVUF output file: %s", error) + logger.error(" ") + logger.error(" capa currently only supports analyzing standard DRAKVUF outputs in JSONL format.") + logger.error( + " Please make sure your report file is in the standard format and contains both the static and dynamic sections." + ) + logger.error("-" * 80) + + def log_empty_cape_report_error(error: str): logger.error("-" * 80) logger.error(" CAPE report is empty or only contains little useful data: %s", error) @@ -197,6 +236,14 @@ def log_empty_cape_report_error(error: str): logger.error("-" * 80) +def log_empty_drakvuf_report_error(error: str): + logger.error("-" * 80) + logger.error(" DRAKVUF report is empty or only contains little useful data: %s", error) + logger.error(" ") + logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.") + logger.error("-" * 80) + + def log_unsupported_os_error(): logger.error("-" * 80) logger.error(" Input file does not appear to target a supported OS.") diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 0aee6cea2..4cf612051 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -932,9 +932,9 @@ def get_ask_use_persistent_cache(self, analyze): update_wait_box("verifying cached results") try: - results: Optional[capa.render.result_document.ResultDocument] = ( - capa.ida.helpers.load_and_verify_cached_results() - ) + results: Optional[ + capa.render.result_document.ResultDocument + ] = capa.ida.helpers.load_and_verify_cached_results() except Exception as e: capa.ida.helpers.inform_user_ida_ui("Failed to verify cached results, reanalyzing program") logger.exception("Failed to verify cached results (error: %s)", e) diff --git a/capa/loader.py b/capa/loader.py index 8e91fae0f..c86b82bdb 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -44,6 +44,7 @@ FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, + FORMAT_DRAKVUF, ) from capa.features.address import Address from capa.features.extractors.base_extractor import ( @@ -60,6 +61,7 @@ BACKEND_BINJA = "binja" BACKEND_PEFILE = "pefile" BACKEND_CAPE = "cape" +BACKEND_DRAKVUF = "drakvuf" BACKEND_FREEZE = "freeze" @@ -186,6 +188,12 @@ def get_extractor( report = capa.helpers.load_json_from_path(input_path) return capa.features.extractors.cape.extractor.CapeExtractor.from_report(report) + elif backend == BACKEND_DRAKVUF: + import capa.features.extractors.drakvuf.extractor + + report = capa.helpers.load_jsonl_from_path(input_path) + return capa.features.extractors.drakvuf.extractor.DrakvufExtractor.from_report(report) + elif backend == BACKEND_DOTNET: import capa.features.extractors.dnfile.extractor @@ -303,6 +311,16 @@ def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtr report = capa.helpers.load_json_from_path(input_file) file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report)) + elif input_format == FORMAT_DRAKVUF: + import gc + + import capa.helpers + import capa.features.extractors.drakvuf.extractor + + report = capa.helpers.load_jsonl_from_path(input_file) + print(f"collected {gc.collect()} after jsonl") + file_extractors.append(capa.features.extractors.drakvuf.extractor.DrakvufExtractor.from_report(report)) + return file_extractors diff --git a/capa/main.py b/capa/main.py index eb43769d2..3ca78b1db 100644 --- a/capa/main.py +++ b/capa/main.py @@ -42,7 +42,15 @@ import capa.features.extractors.common from capa.rules import RuleSet from capa.engine import MatchResults -from capa.loader import BACKEND_VIV, BACKEND_CAPE, BACKEND_BINJA, BACKEND_DOTNET, BACKEND_FREEZE, BACKEND_PEFILE +from capa.loader import ( + BACKEND_VIV, + BACKEND_CAPE, + BACKEND_BINJA, + BACKEND_DOTNET, + BACKEND_FREEZE, + BACKEND_PEFILE, + BACKEND_DRAKVUF, +) from capa.helpers import ( get_file_taste, get_auto_format, @@ -50,7 +58,9 @@ log_unsupported_arch_error, log_empty_cape_report_error, log_unsupported_format_error, + log_empty_drakvuf_report_error, log_unsupported_cape_report_error, + log_unsupported_drakvuf_report_error, ) from capa.exceptions import ( EmptyReportError, @@ -73,6 +83,7 @@ FORMAT_DOTNET, FORMAT_FREEZE, FORMAT_RESULT, + FORMAT_DRAKVUF, ) from capa.capabilities.common import find_capabilities, has_file_limitation, find_file_capabilities from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor @@ -232,6 +243,7 @@ def install_common_args(parser, wanted=None): (FORMAT_SC32, "32-bit shellcode"), (FORMAT_SC64, "64-bit shellcode"), (FORMAT_CAPE, "CAPE sandbox report"), + (FORMAT_DRAKVUF, "DRAKVUF output"), (FORMAT_FREEZE, "features previously frozen by capa"), ] format_help = ", ".join([f"{f[0]}: {f[1]}" for f in formats]) @@ -253,6 +265,7 @@ def install_common_args(parser, wanted=None): (BACKEND_DOTNET, ".NET"), (BACKEND_FREEZE, "capa freeze"), (BACKEND_CAPE, "CAPE"), + (BACKEND_DRAKVUF, "DRAKVUF"), ] backend_help = ", ".join([f"{f[0]}: {f[1]}" for f in backends]) parser.add_argument( @@ -505,6 +518,9 @@ def get_backend_from_cli(args, input_format: str) -> str: if input_format == FORMAT_CAPE: return BACKEND_CAPE + if input_format == FORMAT_DRAKVUF: + return BACKEND_DRAKVUF + elif input_format == FORMAT_DOTNET: return BACKEND_DOTNET @@ -529,7 +545,7 @@ def get_sample_path_from_cli(args, backend: str) -> Optional[Path]: raises: ShouldExitError: if the program is invoked incorrectly and should exit. """ - if backend == BACKEND_CAPE: + if backend in (BACKEND_CAPE, BACKEND_DRAKVUF): return None else: return args.input_file @@ -632,6 +648,8 @@ def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtract except UnsupportedFormatError as e: if input_format == FORMAT_CAPE: log_unsupported_cape_report_error(str(e)) + elif input_format == FORMAT_DRAKVUF: + log_unsupported_drakvuf_report_error(str(e)) else: log_unsupported_format_error() raise ShouldExitError(E_INVALID_FILE_TYPE) from e @@ -639,6 +657,9 @@ def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtract if input_format == FORMAT_CAPE: log_empty_cape_report_error(str(e)) raise ShouldExitError(E_EMPTY_REPORT) from e + elif input_format == FORMAT_DRAKVUF: + log_empty_drakvuf_report_error(str(e)) + raise ShouldExitError(E_EMPTY_REPORT) from e else: log_unsupported_format_error() raise ShouldExitError(E_INVALID_FILE_TYPE) from e @@ -744,6 +765,8 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr except UnsupportedFormatError as e: if input_format == FORMAT_CAPE: log_unsupported_cape_report_error(str(e)) + elif input_format == FORMAT_DRAKVUF: + log_unsupported_drakvuf_report_error(str(e)) else: log_unsupported_format_error() raise ShouldExitError(E_INVALID_FILE_TYPE) from e diff --git a/capa/render/result_document.py b/capa/render/result_document.py index d2de49d73..13e37fa98 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -160,7 +160,8 @@ class CompoundStatementType: OPTIONAL = "optional" -class StatementModel(FrozenModel): ... +class StatementModel(FrozenModel): + ... class CompoundStatement(StatementModel): @@ -649,9 +650,9 @@ def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) - return ResultDocument(meta=meta, rules=rule_matches) def to_capa(self) -> Tuple[Metadata, Dict]: - capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = ( - collections.defaultdict(list) - ) + capabilities: Dict[ + str, List[Tuple[capa.features.address.Address, capa.features.common.Result]] + ] = collections.defaultdict(list) # this doesn't quite work because we don't have the rule source for rules that aren't matched. rules_by_name = { diff --git a/pyproject.toml b/pyproject.toml index 9adbff8d7..3aebc5e2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,8 @@ dependencies = [ "dncil==1.0.2", "pydantic==2.7.1", "protobuf==5.27.0", - "rich==13.4.2" + "rich==13.4.2", + "msgspec==0.18.6" ] dynamic = ["version"] diff --git a/tests/fixtures.py b/tests/fixtures.py index 531043861..ba68b4e1e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -199,6 +199,16 @@ def get_cape_extractor(path): return CapeExtractor.from_report(report) +@lru_cache(maxsize=1) +def get_drakvuf_extractor(path): + from capa.helpers import load_jsonl_from_path + from capa.features.extractors.drakvuf.extractor import DrakvufExtractor + + report = load_jsonl_from_path(path) + + return DrakvufExtractor.from_report(report) + + @lru_cache(maxsize=1) def get_ghidra_extractor(path: Path): import capa.features.extractors.ghidra.extractor @@ -385,6 +395,15 @@ def get_data_path_by_name(name) -> Path: / "v2.2" / "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz" ) + elif name.startswith("93b2d1"): + return ( + CD + / "data" + / "dynamic" + / "drakvuf" + / "93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795" + / "drakmon.log" + ) elif name.startswith("ea2876"): return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" elif name.startswith("1038a2"): @@ -678,7 +697,7 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -DYNAMIC_FEATURE_PRESENCE_TESTS = sorted( +DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted( [ # file/string ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True), @@ -717,7 +736,7 @@ def parametrize(params, values, **kwargs): key=lambda t: (t[0], t[1]), ) -DYNAMIC_FEATURE_COUNT_TESTS = sorted( +DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted( [ # file/string ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1), @@ -756,6 +775,80 @@ def parametrize(params, values, **kwargs): key=lambda t: (t[0], t[1]), ) +DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted( + [ + # file/string + ( + "93b2d1", + "file", + capa.features.common.String( + "\\Program Files\\WindowsApps\\microsoft.windowscommunicationsapps_16005.11629.20316.0_x64__8wekyb3d8bbwe\\resources.pri" + ), + True, + ), + ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), + # file/imports + ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), True), + # thread/api calls + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True), + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), + # call/api + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("LdrLoadDll"), True), + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("DoesNotExist"), False), + # call/string argument + ( + "93b2d1", + "process=(3564:4852),thread=6592, call=1", + capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), + True, + ), + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.common.String("non_existant"), False), + # call/number argument + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x801), True), + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x010101010101), False), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + +DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS = sorted( + [ + # file/string + ( + "93b2d1", + "file", + capa.features.common.String( + "\\Program Files\\WindowsApps\\microsoft.windowscommunicationsapps_16005.11629.20316.0_x64__8wekyb3d8bbwe\\resources.pri" + ), + 1, + ), + ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), + # file/imports + ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), 1), + # thread/api calls + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), 9), + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), + # call/api + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("LdrLoadDll"), 1), + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("DoesNotExist"), 0), + # call/string argument + ( + "93b2d1", + "process=(3564:4852),thread=6592, call=1", + capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), + 1, + ), + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.common.String("non_existant"), 0), + # call/number argument + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x801), 1), + ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x010101010101), 0), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + FEATURE_PRESENCE_TESTS = sorted( [ # file/characteristic("embedded pe") diff --git a/tests/test_cape_features.py b/tests/test_cape_features.py index 6dc833c0a..769d88a39 100644 --- a/tests/test_cape_features.py +++ b/tests/test_cape_features.py @@ -11,7 +11,7 @@ @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.DYNAMIC_FEATURE_PRESENCE_TESTS, + fixtures.DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"], ) def test_cape_features(sample, scope, feature, expected): @@ -20,7 +20,7 @@ def test_cape_features(sample, scope, feature, expected): @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.DYNAMIC_FEATURE_COUNT_TESTS, + fixtures.DYNAMIC_CAPE_FEATURE_COUNT_TESTS, indirect=["sample", "scope"], ) def test_cape_feature_counts(sample, scope, feature, expected): diff --git a/tests/test_drakvuf_features.py b/tests/test_drakvuf_features.py new file mode 100644 index 000000000..57dc37b31 --- /dev/null +++ b/tests/test_drakvuf_features.py @@ -0,0 +1,27 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import fixtures + + +@fixtures.parametrize( + "sample,scope,feature,expected", + fixtures.DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS, + indirect=["sample", "scope"], +) +def test_drakvuf_features(sample, scope, feature, expected): + fixtures.do_test_feature_presence(fixtures.get_drakvuf_extractor, sample, scope, feature, expected) + + +@fixtures.parametrize( + "sample,scope,feature,expected", + fixtures.DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS, + indirect=["sample", "scope"], +) +def test_drakvuf_feature_counts(sample, scope, feature, expected): + fixtures.do_test_feature_count(fixtures.get_drakvuf_extractor, sample, scope, feature, expected) From 603d623f4872981aeb744563087bad460b3a1d36 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 11 Jun 2024 01:34:41 +0100 Subject: [PATCH 02/59] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99f92a8bf..603a206d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz - replace Halo spinner with Rich #2086 @s-ff +- add a feature extractor for Drakvuf sandbox #2143 @yelhamer ### Breaking Changes From 1e8735a0d4b477035caa011915c1de015d09610a Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 11 Jun 2024 01:41:47 +0100 Subject: [PATCH 03/59] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 062cac69d..3e4f0c8eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz - render maec/* fields #843 @s-ff - replace Halo spinner with Rich #2086 @s-ff -- add a feature extractor for Drakvuf sandbox #2143 @yelhamer - optimize rule matching #2080 @williballenthin +- add a feature extractor for Drakvuf sandbox #2143 @yelhamer ### Breaking Changes From d2cdccf0279d4302b67485b38615afd3b79fc766 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 11 Jun 2024 01:44:45 +0100 Subject: [PATCH 04/59] Update pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1e69e4edd..b93430865 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,6 @@ dependencies = [ "rich==13.7.1", "humanize==4.9.0", "protobuf==5.27.0", - "rich==13.4.2", "msgspec==0.18.6" ] dynamic = ["version"] From 840f59fece9ff6ad57f82f090a579a5d4b54f27b Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Mon, 17 Jun 2024 01:46:44 +0100 Subject: [PATCH 05/59] Apply suggestions from code review: Typos Co-authored-by: Vasco Schiavo <115561717+VascoSch92@users.noreply.github.com> --- capa/features/extractors/drakvuf/call.py | 2 +- capa/features/extractors/drakvuf/file.py | 4 ++-- capa/features/extractors/drakvuf/process.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index d979cae97..bc6a26899 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -20,7 +20,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: """ - this method extracts the given call's features (such as API name and arguments), + This method extracts the given call's features (such as API name and arguments), and returns them as API, Number, and String features. args: diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index 016df11f0..99bd5e967 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -21,7 +21,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]]) -> Iterator[ProcessHandle]: """ - get all the created processes for a sample + Get all the created processes for a sample. """ for proc_addr, calls_per_thread in calls.items(): sample_call: Call = next(iter(calls_per_thread.values()))[0] # get process name @@ -30,7 +30,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]]) -> Ite def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: """ - extract imported function names + Extract imported function names. """ if report.loaded_dlls is None: return diff --git a/capa/features/extractors/drakvuf/process.py b/capa/features/extractors/drakvuf/process.py index bbd6fee61..a3ffca502 100644 --- a/capa/features/extractors/drakvuf/process.py +++ b/capa/features/extractors/drakvuf/process.py @@ -19,7 +19,7 @@ def get_threads(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]], ph: ProcessHandle) -> Iterator[ThreadHandle]: """ - get the threads associated with a given process + Get the threads associated with a given process. """ for thread_addr in calls[ph.address]: yield ThreadHandle(address=thread_addr, inner={}) From 9e133622ab94940724671ffcd4dd624bbef5cd8b Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Mon, 17 Jun 2024 01:50:13 +0100 Subject: [PATCH 06/59] capa/helpers.py: update if/else statement Co-authored-by: Vasco Schiavo <115561717+VascoSch92@users.noreply.github.com> --- capa/helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 0974b18c7..967103760 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -108,8 +108,7 @@ def get_format_from_report(sample: Path) -> str: line = load_one_jsonl_from_path(sample) if "Plugin" in line: return FORMAT_DRAKVUF - else: - return FORMAT_UNKNOWN + return FORMAT_UNKNOWN report = load_json_from_path(sample) if "CAPE" in report: From 2e408d8f250a45b64ec93e0c60e9a9af9210329b Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Wed, 19 Jun 2024 02:39:52 +0100 Subject: [PATCH 07/59] loader.py: replace print() statement with log.info() --- capa/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/loader.py b/capa/loader.py index c86b82bdb..56d9b8104 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -318,7 +318,7 @@ def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtr import capa.features.extractors.drakvuf.extractor report = capa.helpers.load_jsonl_from_path(input_file) - print(f"collected {gc.collect()} after jsonl") + logger.info(f"collected {gc.collect()} objects after loading jsonl") file_extractors.append(capa.features.extractors.drakvuf.extractor.DrakvufExtractor.from_report(report)) return file_extractors From b28e0d08f92d69e140c897f338bea52928fef73d Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:30:44 +0100 Subject: [PATCH 08/59] Update capa/features/extractors/drakvuf/models.py Co-authored-by: Moritz --- capa/features/extractors/drakvuf/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index b3fd4f6cb..100dbcc13 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -84,7 +84,7 @@ class SystemCall(Call): @model_validator(mode="before") @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: - # Drakvuf stores argument names and vlaues as entries in the syscall's entry. + # Drakvuf stores argument names and values as entries in the syscall's entry. # This model validator collects those arguments into a list in the model. values["arguments"] = { name: value for name, value in values.items() if name not in REQUIRED_SYSCALL_FIELD_NAMES From c05b973dfc393d622fc9516470b10ca9410dd4e0 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 03:57:12 +0100 Subject: [PATCH 09/59] extractors/drakvuf/call.py: yield arguments right to left --- capa/features/extractors/drakvuf/call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index bc6a26899..c1122e702 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -34,7 +34,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - call: Call = ch.inner # list similar to disassembly: arguments right-to-left, call - for arg_value in call.arguments.values(): + for arg_value in reversed(call.arguments.values()): if arg_value.isdecimal(): yield Number(int(arg_value)), ch.address else: From 70d03eb053761256b4211c73737a7d022ec5c86c Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 03:59:09 +0100 Subject: [PATCH 10/59] extractors/drakvuf/file.py: add a TODO comment for extracting more file features --- capa/features/extractors/drakvuf/file.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index 99bd5e967..c73eb81c5 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -57,6 +57,7 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] FILE_HANDLERS = ( + # TODO: extract more file features from other drakvuf plugins extract_import_names, extract_file_strings, ) From 8d4f3c769e75b780db078b2eacf9bdb296fada0b Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 04:06:14 +0100 Subject: [PATCH 11/59] extractors/drakvuf/global_.py: add arch extraction --- capa/features/extractors/drakvuf/global_.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py index 3b30c8d44..e040f1dcb 100644 --- a/capa/features/extractors/drakvuf/global_.py +++ b/capa/features/extractors/drakvuf/global_.py @@ -9,7 +9,7 @@ import logging from typing import Tuple, Iterator -from capa.features.common import OS, FORMAT_PE, OS_WINDOWS, Format, Feature +from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature from capa.features.address import NO_ADDRESS, Address from capa.features.extractors.drakvuf.models import DrakvufReport @@ -17,15 +17,20 @@ def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: - # drakvuf sandbox currently supports only windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + # drakvuf sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Format(FORMAT_PE), NO_ADDRESS def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: - # drakvuf sandbox currently supports only windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + # drakvuf sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield OS(OS_WINDOWS), NO_ADDRESS +def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: + # drakvuf sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + yield Arch(ARCH_AMD64), NO_ADDRESS + + def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(report): @@ -35,4 +40,5 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] GLOBAL_HANDLER = ( extract_format, extract_os, + extract_arch, ) From bf12ce85337c2046db460edfb5c7ed2a7e2d9f46 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 04:08:21 +0100 Subject: [PATCH 12/59] extractors/drakvuf/helpers.py: ignore null pids --- capa/features/extractors/drakvuf/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 686595648..e8c4610e4 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -15,8 +15,8 @@ def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, Call]]: result = {} for call in (*report.syscalls, *report.apicalls): - if call.pid < 1: - # ignore pid's with zero + if call.pid == 0: + # ignore Drakvuf's null pids continue proc_addr = ProcessAddress(pid=call.pid, ppid=call.ppid) thread_addr = ThreadAddress(process=proc_addr, tid=call.tid) From 84d68a497450fdd3ec9adff1e6329e61c000b5b7 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 04:10:31 +0100 Subject: [PATCH 13/59] capa/helpers.py: mention msgspec.json explicitely --- capa/helpers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 967103760..4a6e3028e 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -15,7 +15,7 @@ from pathlib import Path import tqdm -from msgspec import json +import msgspec.json from capa.exceptions import UnsupportedFormatError from capa.features.common import ( @@ -77,9 +77,9 @@ def load_json_from_path(json_path: Path): try: report_json = compressed_report.read() except gzip.BadGzipFile: - report = json.decode(json_path.read_text(encoding="utf-8")) + report = msgspec.json.decode(json_path.read_text(encoding="utf-8")) else: - report = json.decode(report_json) + report = msgspec.json.decode(report_json) return report @@ -88,7 +88,7 @@ def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: for line in f: try: line_s = line.strip().decode() - obj = json.decode(line_s) + obj = msgspec.json.decode(line_s) yield obj except: # ignore erroneous lines @@ -99,7 +99,7 @@ def load_one_jsonl_from_path(jsonl_path: Path) -> str: # this loads one json line to avoid the overhead of loading the entire file with open(jsonl_path, "rb") as f: line = next(iter(f)) - line = json.decode(line.decode(errors="ignore")) + line = msgspec.json.decode(line.decode(errors="ignore")) return line From 00349d53f06973ae5d0617b3d34e3745ae6ee2cb Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 04:18:35 +0100 Subject: [PATCH 14/59] capa/helpers.py: generalize empty sandbox reports error logging --- capa/helpers.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 4a6e3028e..9df19425c 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -227,17 +227,9 @@ def log_unsupported_drakvuf_report_error(error: str): logger.error("-" * 80) -def log_empty_cape_report_error(error: str): +def log_empty_sandbox_report_error(error: str, sandbox_name: str = "Dynamic"): logger.error("-" * 80) - logger.error(" CAPE report is empty or only contains little useful data: %s", error) - logger.error(" ") - logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.") - logger.error("-" * 80) - - -def log_empty_drakvuf_report_error(error: str): - logger.error("-" * 80) - logger.error(" DRAKVUF report is empty or only contains little useful data: %s", error) + logger.error(f" {sandbox_name} report is empty or only contains little useful data: %s", error) logger.error(" ") logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.") logger.error("-" * 80) From 53439c78e89569fbdb80a622fa22c64ce3fc691c Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 04:20:23 +0100 Subject: [PATCH 15/59] capa/loader.py: log jsonl garbage collection into debug --- capa/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/loader.py b/capa/loader.py index bb314eb31..9e26baeec 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -331,7 +331,7 @@ def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtr import capa.features.extractors.drakvuf.extractor report = capa.helpers.load_jsonl_from_path(input_file) - logger.info(f"collected {gc.collect()} objects after loading jsonl") + logger.debug(f"collected {gc.collect()} objects after loading jsonl") file_extractors.append(capa.features.extractors.drakvuf.extractor.DrakvufExtractor.from_report(report)) return file_extractors From 2663fa633cf7b53e347d2e4520ab17a1ad619330 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 04:27:25 +0100 Subject: [PATCH 16/59] features/extractors/drakvuf/models.py: add documentation for SystemCall class --- capa/features/extractors/drakvuf/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 100dbcc13..840384948 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -64,6 +64,7 @@ class Call(ConciseModel): class WinApiCall(Call): + # This class models Windows api calls captured by Drakvuf (DLLs, etc.). arguments: Dict[str, str] = Field(alias="Arguments") event: str = Field(alias="Event") return_value: str = Field(alias="ReturnValue") @@ -77,6 +78,7 @@ def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: class SystemCall(Call): + # This class models native Windows api calls captured by Drakvuf. syscall_number: int = Field(alias="Syscall") module: str = Field(alias="Module") nargs: int = Field(alias="NArgs") From 3bea6e79ca7f5fbf01388f14216810220a0dcb45 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 08:58:05 +0100 Subject: [PATCH 17/59] capa/main.py: fix erroneous imports --- capa/main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/capa/main.py b/capa/main.py index 3ca78b1db..d5b4226f6 100644 --- a/capa/main.py +++ b/capa/main.py @@ -56,9 +56,8 @@ get_auto_format, log_unsupported_os_error, log_unsupported_arch_error, - log_empty_cape_report_error, log_unsupported_format_error, - log_empty_drakvuf_report_error, + log_empty_sandbox_report_error, log_unsupported_cape_report_error, log_unsupported_drakvuf_report_error, ) @@ -655,10 +654,10 @@ def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtract raise ShouldExitError(E_INVALID_FILE_TYPE) from e except EmptyReportError as e: if input_format == FORMAT_CAPE: - log_empty_cape_report_error(str(e)) + log_empty_sandbox_report_error(str(e), sandbox_name="CAPE") raise ShouldExitError(E_EMPTY_REPORT) from e elif input_format == FORMAT_DRAKVUF: - log_empty_drakvuf_report_error(str(e)) + log_empty_sandbox_report_error(str(e), sandbox_name="DRAKVUF") raise ShouldExitError(E_EMPTY_REPORT) from e else: log_unsupported_format_error() From 15a5efdd9608e2872c9f647bfd879563c935e26c Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 09:12:31 +0100 Subject: [PATCH 18/59] drakvuf extractor: fixed faulty type annotations --- capa/features/extractors/drakvuf/extractor.py | 4 ++-- capa/features/extractors/drakvuf/file.py | 6 +++--- capa/features/extractors/drakvuf/helpers.py | 6 +++--- capa/features/extractors/drakvuf/models.py | 10 +++++----- capa/features/extractors/drakvuf/process.py | 6 ++++-- capa/features/extractors/drakvuf/thread.py | 4 ++-- capa/rules/__init__.py | 1 - 7 files changed, 19 insertions(+), 18 deletions(-) diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 577e5f989..142101d3f 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -39,14 +39,14 @@ def __init__(self, report: DrakvufReport): self.report: DrakvufReport = report # sort the api calls to prevent going through the entire list each time - self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]] = sort_calls(report) + self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = sort_calls(report) # pre-compute these because we'll yield them at *every* scope. self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report)) def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]: # DRAKVUF currently does not yield information about the PE's address - return _NoAddress + return NO_ADDRESS def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: yield from self.global_features diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index c73eb81c5..b85edb2d7 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, Tuple, Iterator +from typing import Dict, List, Tuple, Iterator from capa.features.file import Import from capa.features.common import String, Feature @@ -19,12 +19,12 @@ logger = logging.getLogger(__name__) -def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]]) -> Iterator[ProcessHandle]: +def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]: """ Get all the created processes for a sample. """ for proc_addr, calls_per_thread in calls.items(): - sample_call: Call = next(iter(calls_per_thread.values()))[0] # get process name + sample_call = next(iter(calls_per_thread.values()))[0] # get process name yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name}) diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index e8c4610e4..29255cf74 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -6,14 +6,14 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict +from typing import Dict, List from capa.features.address import ThreadAddress, ProcessAddress from capa.features.extractors.drakvuf.models import Call, DrakvufReport -def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, Call]]: - result = {} +def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]: + result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} for call in (*report.syscalls, *report.apicalls): if call.pid == 0: # ignore Drakvuf's null pids diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 840384948..660cb1f36 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -95,10 +95,10 @@ def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: class DrakvufReport(ConciseModel): - syscalls: List[SystemCall] = None - apicalls: List[WinApiCall] = None - discovered_dlls: List[DiscoveredDLL] = None - loaded_dlls: List[LoadedDLL] = None + syscalls: List[SystemCall] = [] + apicalls: List[WinApiCall] = [] + discovered_dlls: List[DiscoveredDLL] = [] + loaded_dlls: List[LoadedDLL] = [] @model_validator(mode="after") def validate_arguments(self) -> "DrakvufReport": @@ -108,7 +108,7 @@ def validate_arguments(self) -> "DrakvufReport": @classmethod def from_raw_report(cls, entries: List[Dict]) -> "DrakvufReport": - values = {"syscalls": [], "apicalls": [], "discovered_dlls": [], "loaded_dlls": []} + values: Dict[str, List] = {"syscalls": [], "apicalls": [], "discovered_dlls": [], "loaded_dlls": []} for entry in entries: plugin = entry.get("Plugin") diff --git a/capa/features/extractors/drakvuf/process.py b/capa/features/extractors/drakvuf/process.py index a3ffca502..0a5feca42 100644 --- a/capa/features/extractors/drakvuf/process.py +++ b/capa/features/extractors/drakvuf/process.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, Tuple, Iterator +from typing import Dict, List, Tuple, Iterator from capa.features.common import String, Feature from capa.features.address import Address, ThreadAddress, ProcessAddress @@ -17,7 +17,9 @@ logger = logging.getLogger(__name__) -def get_threads(calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]], ph: ProcessHandle) -> Iterator[ThreadHandle]: +def get_threads( + calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle +) -> Iterator[ThreadHandle]: """ Get the threads associated with a given process. """ diff --git a/capa/features/extractors/drakvuf/thread.py b/capa/features/extractors/drakvuf/thread.py index 7d858be92..a1532edcb 100644 --- a/capa/features/extractors/drakvuf/thread.py +++ b/capa/features/extractors/drakvuf/thread.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, Iterator +from typing import Dict, List, Iterator from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle @@ -17,7 +17,7 @@ def get_calls( - sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, Call]], ph: ProcessHandle, th: ThreadHandle + sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle ) -> Iterator[CallHandle]: for i, call in enumerate(sorted_calls[ph.address][th.address]): call_addr = DynamicCallAddress(thread=th.address, id=i) diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 3deff533f..e2f0a8102 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -1918,7 +1918,6 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea # This strategy is described here: # https://github.com/mandiant/capa/issues/2129 if feature_index.string_rules: - # This is a FeatureSet that contains only String features. # Since we'll only be evaluating String/Regex features below, we don't care about # other sorts of features (Mnemonic, Number, etc.) and therefore can save some time From 0c0c4d059a0ef03708b2ed0debeaeabfd8cf9e13 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 19:01:55 +0100 Subject: [PATCH 19/59] fix black formatting --- capa/features/address.py | 3 +-- capa/features/extractors/drakvuf/extractor.py | 6 +++--- capa/ida/plugin/form.py | 6 +++--- capa/render/result_document.py | 9 ++++----- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/capa/features/address.py b/capa/features/address.py index f0cbd77d2..45c3a600f 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -10,8 +10,7 @@ class Address(abc.ABC): @abc.abstractmethod - def __eq__(self, other): - ... + def __eq__(self, other): ... @abc.abstractmethod def __lt__(self, other): diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 142101d3f..7f964daf0 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -81,9 +81,9 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> call_name = "{}({}){}".format( call.name, ", ".join(f"{arg_name}={arg_value}" for arg_name, arg_value in call.arguments.items()), - f" -> {call.return_value}" - if hasattr(call, "return_value") - else "", # SysCalls don't have a return value, while WinApi calls do + ( + f" -> {call.return_value}" if hasattr(call, "return_value") else "" + ), # SysCalls don't have a return value, while WinApi calls do ) return call_name diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 4cf612051..0aee6cea2 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -932,9 +932,9 @@ def get_ask_use_persistent_cache(self, analyze): update_wait_box("verifying cached results") try: - results: Optional[ - capa.render.result_document.ResultDocument - ] = capa.ida.helpers.load_and_verify_cached_results() + results: Optional[capa.render.result_document.ResultDocument] = ( + capa.ida.helpers.load_and_verify_cached_results() + ) except Exception as e: capa.ida.helpers.inform_user_ida_ui("Failed to verify cached results, reanalyzing program") logger.exception("Failed to verify cached results (error: %s)", e) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 255d70ff9..975e37431 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -160,8 +160,7 @@ class CompoundStatementType: OPTIONAL = "optional" -class StatementModel(FrozenModel): - ... +class StatementModel(FrozenModel): ... class CompoundStatement(StatementModel): @@ -650,9 +649,9 @@ def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) - return ResultDocument(meta=meta, rules=rule_matches) def to_capa(self) -> Tuple[Metadata, Dict]: - capabilities: Dict[ - str, List[Tuple[capa.features.address.Address, capa.features.common.Result]] - ] = collections.defaultdict(list) + capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = ( + collections.defaultdict(list) + ) # this doesn't quite work because we don't have the rule source for rules that aren't matched. rules_by_name = { From 04ae28023044b381600414e8d882d459e685b5c2 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 19:21:34 +0100 Subject: [PATCH 20/59] fix flake8 issues --- capa/features/extractors/drakvuf/call.py | 2 +- capa/features/extractors/drakvuf/extractor.py | 2 +- capa/features/extractors/drakvuf/file.py | 2 +- capa/features/extractors/drakvuf/models.py | 8 ++++---- capa/helpers.py | 10 +++++----- capa/loader.py | 3 --- 6 files changed, 12 insertions(+), 15 deletions(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index c1122e702..673df77c2 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -40,7 +40,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - else: try: yield Number(int(arg_value, 16)), ch.address - except: + except ValueError: # yield it as a string yield String(arg_value), ch.address diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 7f964daf0..755feccbb 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -93,6 +93,6 @@ def extract_call_features( yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch) @classmethod - def from_report(cls, report: List[Dict]) -> "DrakvufExtractor": + def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor": dr = DrakvufReport.from_raw_report(report) return DrakvufExtractor(report=dr) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index b85edb2d7..9d87732ee 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -57,7 +57,7 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] FILE_HANDLERS = ( - # TODO: extract more file features from other drakvuf plugins + # TODO(yelhamer): extract more file features from other drakvuf plugins extract_import_names, extract_file_strings, ) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 660cb1f36..efb049eef 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Iterator from pydantic import Field, BaseModel, ConfigDict, model_validator @@ -73,7 +73,7 @@ class WinApiCall(Call): @classmethod def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: args = values["Arguments"] - values["Arguments"] = {name: val for name, val in map(lambda arg: arg.split("=", 1), args)} + values["Arguments"] = {name: val for name, val in (arg.split("=", 1) for arg in args)} return values @@ -107,7 +107,7 @@ def validate_arguments(self) -> "DrakvufReport": return self @classmethod - def from_raw_report(cls, entries: List[Dict]) -> "DrakvufReport": + def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": values: Dict[str, List] = {"syscalls": [], "apicalls": [], "discovered_dlls": [], "loaded_dlls": []} for entry in entries: @@ -115,7 +115,7 @@ def from_raw_report(cls, entries: List[Dict]) -> "DrakvufReport": if plugin == "syscall": values["syscalls"].append(SystemCall(**entry)) elif plugin == "apimon": - event = entry.get("Event", None) + event = entry.get("Event") if event == "api_called": values["apicalls"].append(WinApiCall(**entry)) elif event == "dll_loaded": diff --git a/capa/helpers.py b/capa/helpers.py index 9df19425c..c3e17e3c4 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -84,20 +84,20 @@ def load_json_from_path(json_path: Path): def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: - with open(jsonl_path, "rb") as f: + with jsonl_path.open(mode="rb") as f: for line in f: try: line_s = line.strip().decode() obj = msgspec.json.decode(line_s) yield obj - except: + except ValueError: # ignore erroneous lines continue -def load_one_jsonl_from_path(jsonl_path: Path) -> str: +def load_one_jsonl_from_path(jsonl_path: Path): # this loads one json line to avoid the overhead of loading the entire file - with open(jsonl_path, "rb") as f: + with jsonl_path.open(mode="rb") as f: line = next(iter(f)) line = msgspec.json.decode(line.decode(errors="ignore")) return line @@ -229,7 +229,7 @@ def log_unsupported_drakvuf_report_error(error: str): def log_empty_sandbox_report_error(error: str, sandbox_name: str = "Dynamic"): logger.error("-" * 80) - logger.error(f" {sandbox_name} report is empty or only contains little useful data: %s", error) + logger.error(" %s report is empty or only contains little useful data: %s", sandbox_name, error) logger.error(" ") logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.") logger.error("-" * 80) diff --git a/capa/loader.py b/capa/loader.py index 9e26baeec..d2e3abe3d 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -325,13 +325,10 @@ def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtr file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report)) elif input_format == FORMAT_DRAKVUF: - import gc - import capa.helpers import capa.features.extractors.drakvuf.extractor report = capa.helpers.load_jsonl_from_path(input_file) - logger.debug(f"collected {gc.collect()} objects after loading jsonl") file_extractors.append(capa.features.extractors.drakvuf.extractor.DrakvufExtractor.from_report(report)) return file_extractors From e54f38f73306f751c71c2ff52215c40a5aea3ea3 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 19:35:26 +0100 Subject: [PATCH 21/59] drakvuf file extraction: add link to tracking issue --- capa/features/extractors/drakvuf/file.py | 1 + capa/features/extractors/drakvuf/models.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index 9d87732ee..12785623e 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -58,6 +58,7 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] FILE_HANDLERS = ( # TODO(yelhamer): extract more file features from other drakvuf plugins + # https://github.com/mandiant/capa/issues/2163 extract_import_names, extract_file_strings, ) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index efb049eef..0f33254ec 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -73,7 +73,7 @@ class WinApiCall(Call): @classmethod def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: args = values["Arguments"] - values["Arguments"] = {name: val for name, val in (arg.split("=", 1) for arg in args)} + values["Arguments"] = dict(arg.split("=", 1) for arg in args) return values From cb7babc5e56bb4500da15019bf3fb10d8608cf46 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 21:24:41 +0100 Subject: [PATCH 22/59] drakvuf reports: add the ability to read gzip-compressed report files --- capa/helpers.py | 13 ++++++++++--- tests/fixtures.py | 3 +-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index c3e17e3c4..7ea363ef4 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -32,7 +32,7 @@ EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") -EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log") +EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log", ".log.gz") EXTENSIONS_ELF = "elf_" EXTENSIONS_FREEZE = "frz" @@ -84,7 +84,7 @@ def load_json_from_path(json_path: Path): def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: - with jsonl_path.open(mode="rb") as f: + def decode_json_lines(fd): for line in f: try: line_s = line.strip().decode() @@ -94,6 +94,13 @@ def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: # ignore erroneous lines continue + try: + with gzip.open(jsonl_path, "rb") as f: + yield from decode_json_lines(f) + except gzip.BadGzipFile: + with jsonl_path.open(mode="rb") as f: + yield from decode_json_lines(f) + def load_one_jsonl_from_path(jsonl_path: Path): # this loads one json line to avoid the overhead of loading the entire file @@ -104,7 +111,7 @@ def load_one_jsonl_from_path(jsonl_path: Path): def get_format_from_report(sample: Path) -> str: - if sample.name.endswith(".log"): + if sample.name.endswith((".log", "log.gz")): line = load_one_jsonl_from_path(sample) if "Plugin" in line: return FORMAT_DRAKVUF diff --git a/tests/fixtures.py b/tests/fixtures.py index 389d33497..b98244d9a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -401,8 +401,7 @@ def get_data_path_by_name(name) -> Path: / "data" / "dynamic" / "drakvuf" - / "93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795" - / "drakmon.log" + / "93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795.log.gz" ) elif name.startswith("ea2876"): return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" From 5284ec04dc45ce7968dfe8759b0430e1a8ed2588 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 21 Jun 2024 21:50:27 +0100 Subject: [PATCH 23/59] capa/helpers.py: fix mypy issues --- capa/helpers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 7ea363ef4..ba5bbd1c1 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -11,7 +11,7 @@ import logging import contextlib import importlib.util -from typing import Dict, Iterator, NoReturn +from typing import Dict, Union, BinaryIO, Iterator, NoReturn from pathlib import Path import tqdm @@ -84,8 +84,8 @@ def load_json_from_path(json_path: Path): def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: - def decode_json_lines(fd): - for line in f: + def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): + for line in fd: try: line_s = line.strip().decode() obj = msgspec.json.decode(line_s) @@ -95,8 +95,8 @@ def decode_json_lines(fd): continue try: - with gzip.open(jsonl_path, "rb") as f: - yield from decode_json_lines(f) + with gzip.open(jsonl_path, "rb") as fg: + yield from decode_json_lines(fg) except gzip.BadGzipFile: with jsonl_path.open(mode="rb") as f: yield from decode_json_lines(f) From 21d50e0674e4b58c2b8e8a6c2ee94cd8f6aced65 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 25 Jun 2024 06:20:44 +0100 Subject: [PATCH 24/59] apply review comments --- capa/features/extractors/drakvuf/call.py | 14 ++++++-------- capa/features/extractors/drakvuf/file.py | 2 +- capa/features/extractors/drakvuf/models.py | 17 +++++++++++++++++ capa/helpers.py | 11 ++++++++++- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 673df77c2..70ad70483 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -9,6 +9,7 @@ import logging from typing import Tuple, Iterator +from capa.helpers import str_to_number from capa.features.insn import API, Number from capa.features.common import String, Feature from capa.features.address import Address @@ -35,14 +36,11 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - # list similar to disassembly: arguments right-to-left, call for arg_value in reversed(call.arguments.values()): - if arg_value.isdecimal(): - yield Number(int(arg_value)), ch.address - else: - try: - yield Number(int(arg_value, 16)), ch.address - except ValueError: - # yield it as a string - yield String(arg_value), ch.address + try: + yield Number(str_to_number(arg_value)), ch.address + except ValueError: + # yield argument as a string + yield String(arg_value), ch.address yield API(call.name), ch.address diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index 12785623e..567091d0a 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -58,7 +58,7 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] FILE_HANDLERS = ( # TODO(yelhamer): extract more file features from other drakvuf plugins - # https://github.com/mandiant/capa/issues/2163 + # https://github.com/mandiant/capa/issues/2169 extract_import_names, extract_file_strings, ) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 0f33254ec..1f51676bf 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -79,6 +79,23 @@ def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: class SystemCall(Call): # This class models native Windows api calls captured by Drakvuf. + # Schema: { + # "Plugin": "syscall", + # "TimeStamp": "1716999134.582553", + # "PID": 3888, "PPID": 2852, "TID": 368, "UserName": "SessionID", "UserId": 2, + # "ProcessName": "\\Device\\HarddiskVolume2\\Windows\\explorer.exe", + # "Method": "NtSetIoCompletionEx", + # "EventUID": "0x27", + # "Module": "nt", + # "vCPU": 0, + # "CR3": "0x119b1002", + # "Syscall": 419, + # "NArgs": 6, + # "IoCompletionHandle": "0xffffffff80001ac0", "IoCompletionReserveHandle": "0xffffffff8000188c", + # "KeyContext": "0x0", "ApcContext": "0x2", "IoStatus": "0x7ffb00000000", "IoStatusInformation": "0x0" + # } + # The keys up until "NArgs" are common to all the native calls that Drakvuf reports, with + # the remaining keys representing the call's specific arguments. syscall_number: int = Field(alias="Syscall") module: str = Field(alias="Module") nargs: int = Field(alias="NArgs") diff --git a/capa/helpers.py b/capa/helpers.py index ba5bbd1c1..ba06d3e05 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -47,6 +47,15 @@ def hex(n: int) -> str: return f"0x{(n):X}" +def str_to_number(s: str) -> int: + if s.isdecimal(): + return int(s) + try: + return int(s, 16) + except ValueError: + raise ValueError(f"{s} is not a valid number.") + + def get_file_taste(sample_path: Path) -> bytes: if not sample_path.exists(): raise IOError(f"sample path {sample_path} does not exist or cannot be accessed") @@ -234,7 +243,7 @@ def log_unsupported_drakvuf_report_error(error: str): logger.error("-" * 80) -def log_empty_sandbox_report_error(error: str, sandbox_name: str = "Dynamic"): +def log_empty_sandbox_report_error(error: str, sandbox_name: str): logger.error("-" * 80) logger.error(" %s report is empty or only contains little useful data: %s", sandbox_name, error) logger.error(" ") From 885f216d39c492333b0806a1dc4743ec64ba80f7 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 11:28:07 +0100 Subject: [PATCH 25/59] drakvuf/helpers.py: add more information about null pid --- capa/features/extractors/drakvuf/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 29255cf74..4f8c721e9 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -16,7 +16,9 @@ def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} for call in (*report.syscalls, *report.apicalls): if call.pid == 0: - # ignore Drakvuf's null pids + # Drakvuf captures api/native calls from all processes running on the system. + # we ignore the pid 0 since it's a system process and it's unlikely for it to + # be hijacked or so on, in addition to capa addresses not supporting null pids continue proc_addr = ProcessAddress(pid=call.pid, ppid=call.ppid) thread_addr = ThreadAddress(process=proc_addr, tid=call.tid) From 3b2b02238fccf5407fdb10b3e02efae6cc4b0fde Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 11:32:45 +0100 Subject: [PATCH 26/59] drakvuf/file.py: remove discovered_dlls file strings extraction --- capa/features/extractors/drakvuf/file.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index 567091d0a..b13e8dd0c 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -10,8 +10,8 @@ from typing import Dict, List, Tuple, Iterator from capa.features.file import Import -from capa.features.common import String, Feature -from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, AbsoluteVirtualAddress +from capa.features.common import Feature +from capa.features.address import Address, ThreadAddress, ProcessAddress, AbsoluteVirtualAddress from capa.features.extractors.helpers import generate_symbols from capa.features.extractors.base_extractor import ProcessHandle from capa.features.extractors.drakvuf.models import Call, DrakvufReport @@ -43,13 +43,6 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre yield Import(name), AbsoluteVirtualAddress(function_address) -def extract_file_strings(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: - if report.discovered_dlls is None: - return - for dll in report.discovered_dlls: - yield String(dll.name), NO_ADDRESS - - def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(report): @@ -60,5 +53,4 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] # TODO(yelhamer): extract more file features from other drakvuf plugins # https://github.com/mandiant/capa/issues/2169 extract_import_names, - extract_file_strings, ) From 1e4ed1280afa3bee718e9e80fdb963fdaee686c2 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 11:33:30 +0100 Subject: [PATCH 27/59] capa/helpers.py: add comments for the dynamic extensions --- capa/helpers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/capa/helpers.py b/capa/helpers.py index ba06d3e05..8fb1ab53e 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -32,6 +32,8 @@ EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") +# CAPE extensions: .json, .json_, .json.gz +# Drakvuf Sandbox extensions: .log, .log.gz EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log", ".log.gz") EXTENSIONS_ELF = "elf_" EXTENSIONS_FREEZE = "frz" From b7f40586f46a5182fcb646674deb26397b87e192 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 11:40:47 +0100 Subject: [PATCH 28/59] capa/helpers.py: log bad lines --- capa/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 8fb1ab53e..2eb3ec7cc 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -101,9 +101,9 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): line_s = line.strip().decode() obj = msgspec.json.decode(line_s) yield obj - except ValueError: - # ignore erroneous lines - continue + except (msgspec.DecodeError, UnicodeDecodeError): + # sometimes Drakvuf reports bad method names and/or malformed JSON + logger.debug("bad drakvuf log line: %s", line) try: with gzip.open(jsonl_path, "rb") as fg: From 0f1750c0f9ca5699c1a7fd50a8094bf319f770d2 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 11:45:02 +0100 Subject: [PATCH 29/59] capa/helpers.py: add gzip support for reading one jsonl line --- capa/helpers.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 2eb3ec7cc..f47ed9fc0 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -94,17 +94,18 @@ def load_json_from_path(json_path: Path): return report -def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: - def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): - for line in fd: - try: - line_s = line.strip().decode() - obj = msgspec.json.decode(line_s) - yield obj - except (msgspec.DecodeError, UnicodeDecodeError): - # sometimes Drakvuf reports bad method names and/or malformed JSON - logger.debug("bad drakvuf log line: %s", line) +def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): + for line in fd: + try: + line_s = line.strip().decode() + obj = msgspec.json.decode(line_s) + yield obj + except (msgspec.DecodeError, UnicodeDecodeError): + # sometimes Drakvuf reports bad method names and/or malformed JSON + logger.debug("bad drakvuf log line: %s", line) + +def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: try: with gzip.open(jsonl_path, "rb") as fg: yield from decode_json_lines(fg) @@ -115,10 +116,15 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): def load_one_jsonl_from_path(jsonl_path: Path): # this loads one json line to avoid the overhead of loading the entire file - with jsonl_path.open(mode="rb") as f: - line = next(iter(f)) + try: + with gzip.open(jsonl_path, "rb") as f: + line = next(iter(f)) + except gzip.BadGzipFile: + with jsonl_path.open(mode="rb") as f: + line = next(iter(f)) + finally: line = msgspec.json.decode(line.decode(errors="ignore")) - return line + return line def get_format_from_report(sample: Path) -> str: From 4749f247e6c6a0f597ee36ccb838a25c9fc9d389 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 11:57:11 +0100 Subject: [PATCH 30/59] drakvuf/helpers.py: add comment for sort_calls() --- capa/features/extractors/drakvuf/helpers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 4f8c721e9..e2fa3f27e 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -13,6 +13,8 @@ def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]: + # this method organizes calls into processes and threads, and then sorts them based on + # timestamp so that we can address individual calls per index (CallAddress requires call index) result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} for call in (*report.syscalls, *report.apicalls): if call.pid == 0: From 37f82cbeebc109f2a1643a4f0b3fd9337803adcc Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 12:01:17 +0100 Subject: [PATCH 31/59] tests/fixtures.py: add TODO for unifying CAPE and Drakvuf tests --- tests/fixtures.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index b98244d9a..e99dc8356 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -738,6 +738,8 @@ def parametrize(params, values, **kwargs): ) DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted( + # TODO(yelhamer): use the same sample for testing CAPE and Drakvuf extractors + # https://github.com/mandiant/capa/issues/2180 [ # file/string ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1), From c45aaa0e9f76c8ac704abf8c058003376eb7a8f8 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 12:04:26 +0100 Subject: [PATCH 32/59] drakvuf/models.py: add TODO comment for supporting more drakvuf plugins --- capa/features/extractors/drakvuf/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 1f51676bf..5acb37b15 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -129,6 +129,8 @@ def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": for entry in entries: plugin = entry.get("Plugin") + # TODO(yelhamer): add support for more drakvuf plugins + # https://github.com/mandiant/capa/issues/2181 if plugin == "syscall": values["syscalls"].append(SystemCall(**entry)) elif plugin == "apimon": From aeea39bba7ef4c718db51a7cc08d9001deb7f57d Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 27 Jun 2024 12:07:35 +0100 Subject: [PATCH 33/59] tests/fixtures.py: remove obsolete file strings tests --- capa/helpers.py | 2 +- tests/fixtures.py | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index f47ed9fc0..65bc43d3d 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -124,7 +124,7 @@ def load_one_jsonl_from_path(jsonl_path: Path): line = next(iter(f)) finally: line = msgspec.json.decode(line.decode(errors="ignore")) - return line + return line def get_format_from_report(sample: Path) -> str: diff --git a/tests/fixtures.py b/tests/fixtures.py index e99dc8356..73ddfc59c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -780,15 +780,6 @@ def parametrize(params, values, **kwargs): DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted( [ - # file/string - ( - "93b2d1", - "file", - capa.features.common.String( - "\\Program Files\\WindowsApps\\microsoft.windowscommunicationsapps_16005.11629.20316.0_x64__8wekyb3d8bbwe\\resources.pri" - ), - True, - ), ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), # file/imports ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), True), @@ -817,15 +808,6 @@ def parametrize(params, values, **kwargs): DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS = sorted( [ - # file/string - ( - "93b2d1", - "file", - capa.features.common.String( - "\\Program Files\\WindowsApps\\microsoft.windowscommunicationsapps_16005.11629.20316.0_x64__8wekyb3d8bbwe\\resources.pri" - ), - 1, - ), ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), # file/imports ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), 1), From c862f1205e24029f41603d0cc70f82cf4946087f Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:46:56 +0100 Subject: [PATCH 34/59] Update capa/main.py Co-authored-by: Willi Ballenthin --- capa/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index d5b4226f6..afd0e6855 100644 --- a/capa/main.py +++ b/capa/main.py @@ -242,7 +242,7 @@ def install_common_args(parser, wanted=None): (FORMAT_SC32, "32-bit shellcode"), (FORMAT_SC64, "64-bit shellcode"), (FORMAT_CAPE, "CAPE sandbox report"), - (FORMAT_DRAKVUF, "DRAKVUF output"), + (FORMAT_DRAKVUF, "DRAKVUF sandbox report"), (FORMAT_FREEZE, "features previously frozen by capa"), ] format_help = ", ".join([f"{f[0]}: {f[1]}" for f in formats]) From cea64d38d52843892d69b6f185c79cb079fc7faa Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:47:05 +0100 Subject: [PATCH 35/59] Update capa/features/extractors/drakvuf/models.py Co-authored-by: Willi Ballenthin --- capa/features/extractors/drakvuf/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 5acb37b15..413cab82c 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -78,7 +78,7 @@ def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: class SystemCall(Call): - # This class models native Windows api calls captured by Drakvuf. + # This class models native Windows API calls captured by DRAKVUF. # Schema: { # "Plugin": "syscall", # "TimeStamp": "1716999134.582553", From 718d6ff624ab20e1bbf37c7f19545ee1d88378b3 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:07:48 +0100 Subject: [PATCH 36/59] Update capa/features/extractors/drakvuf/models.py Co-authored-by: Willi Ballenthin --- capa/features/extractors/drakvuf/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 413cab82c..336369927 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -119,7 +119,7 @@ class DrakvufReport(ConciseModel): @model_validator(mode="after") def validate_arguments(self) -> "DrakvufReport": - if any((self.syscalls, self.apicalls, self.discovered_dlls, self.loaded_dlls)) is False: + if not any((self.syscalls, self.apicalls, self.discovered_dlls, self.loaded_dlls)): raise EmptyReportError("Report is empty") return self From 32c7a53d711e03e5609be2e187f809d1bfd57762 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:08:14 +0100 Subject: [PATCH 37/59] Update capa/features/extractors/drakvuf/call.py Co-authored-by: Willi Ballenthin --- capa/features/extractors/drakvuf/call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 70ad70483..b46947698 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt From 7248c0a7712ad04efef3474fd178adf3ecf79716 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:09:14 +0100 Subject: [PATCH 38/59] Update CHANGELOG.md Co-authored-by: Willi Ballenthin --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa03f30ea..eced14fac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## master (unreleased) ### New Features -- add a feature extractor for Drakvuf sandbox #2143 @yelhamer +- support analyzing DRAKVUF sandbox traces #2143 @yelhamer ### Breaking Changes From de43d1e52c0cfcb30d01aeaa7e876c484e7af9c6 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:09:45 +0100 Subject: [PATCH 39/59] Update capa/features/extractors/drakvuf/helpers.py Co-authored-by: Willi Ballenthin --- capa/features/extractors/drakvuf/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index e2fa3f27e..6264c8c2e 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -16,7 +16,7 @@ def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress # this method organizes calls into processes and threads, and then sorts them based on # timestamp so that we can address individual calls per index (CallAddress requires call index) result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} - for call in (*report.syscalls, *report.apicalls): + for call in itertools.chain(report.syscalls, report.apicalls): if call.pid == 0: # Drakvuf captures api/native calls from all processes running on the system. # we ignore the pid 0 since it's a system process and it's unlikely for it to From 3cd5cde9371b0ab91c949333ea0d5f81882cfcc9 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 16 Jul 2024 14:39:13 +0100 Subject: [PATCH 40/59] review comments --- capa/features/address.py | 3 +- capa/features/extractors/drakvuf/extractor.py | 6 +- capa/features/extractors/drakvuf/file.py | 2 +- capa/features/extractors/drakvuf/global_.py | 2 +- capa/features/extractors/drakvuf/helpers.py | 5 +- capa/features/extractors/drakvuf/models.py | 2 +- capa/features/extractors/drakvuf/process.py | 2 +- capa/features/extractors/drakvuf/thread.py | 2 +- capa/helpers.py | 2 +- capa/ida/plugin/form.py | 6 +- capa/render/result_document.py | 9 +- tests/fixtures.py | 136 ------------------ tests/test_cape_features.py | 90 +++++++++++- tests/test_drakvuf_features.py | 65 ++++++++- 14 files changed, 173 insertions(+), 159 deletions(-) diff --git a/capa/features/address.py b/capa/features/address.py index 45c3a600f..f0cbd77d2 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -10,7 +10,8 @@ class Address(abc.ABC): @abc.abstractmethod - def __eq__(self, other): ... + def __eq__(self, other): + ... @abc.abstractmethod def __lt__(self, other): diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 755feccbb..414527880 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt @@ -24,7 +24,7 @@ DynamicFeatureExtractor, ) from capa.features.extractors.drakvuf.models import Call, DrakvufReport -from capa.features.extractors.drakvuf.helpers import sort_calls +from capa.features.extractors.drakvuf.helpers import index_calls logger = logging.getLogger(__name__) @@ -39,7 +39,7 @@ def __init__(self, report: DrakvufReport): self.report: DrakvufReport = report # sort the api calls to prevent going through the entire list each time - self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = sort_calls(report) + self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report) # pre-compute these because we'll yield them at *every* scope. self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report)) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index b13e8dd0c..51a4455a3 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py index e040f1dcb..bd341f5b9 100644 --- a/capa/features/extractors/drakvuf/global_.py +++ b/capa/features/extractors/drakvuf/global_.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 6264c8c2e..26493b70e 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt @@ -6,13 +6,14 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import itertools from typing import Dict, List from capa.features.address import ThreadAddress, ProcessAddress from capa.features.extractors.drakvuf.models import Call, DrakvufReport -def sort_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]: +def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]: # this method organizes calls into processes and threads, and then sorts them based on # timestamp so that we can address individual calls per index (CallAddress requires call index) result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 336369927..59affa6ef 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt diff --git a/capa/features/extractors/drakvuf/process.py b/capa/features/extractors/drakvuf/process.py index 0a5feca42..8b0819264 100644 --- a/capa/features/extractors/drakvuf/process.py +++ b/capa/features/extractors/drakvuf/process.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt diff --git a/capa/features/extractors/drakvuf/thread.py b/capa/features/extractors/drakvuf/thread.py index a1532edcb..5e72b51ab 100644 --- a/capa/features/extractors/drakvuf/thread.py +++ b/capa/features/extractors/drakvuf/thread.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt diff --git a/capa/helpers.py b/capa/helpers.py index 65bc43d3d..57c09bcc6 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -102,7 +102,7 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): yield obj except (msgspec.DecodeError, UnicodeDecodeError): # sometimes Drakvuf reports bad method names and/or malformed JSON - logger.debug("bad drakvuf log line: %s", line) + logger.debug("bad DRAKVUF log line: %s", line) def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 0aee6cea2..4cf612051 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -932,9 +932,9 @@ def get_ask_use_persistent_cache(self, analyze): update_wait_box("verifying cached results") try: - results: Optional[capa.render.result_document.ResultDocument] = ( - capa.ida.helpers.load_and_verify_cached_results() - ) + results: Optional[ + capa.render.result_document.ResultDocument + ] = capa.ida.helpers.load_and_verify_cached_results() except Exception as e: capa.ida.helpers.inform_user_ida_ui("Failed to verify cached results, reanalyzing program") logger.exception("Failed to verify cached results (error: %s)", e) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 975e37431..255d70ff9 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -160,7 +160,8 @@ class CompoundStatementType: OPTIONAL = "optional" -class StatementModel(FrozenModel): ... +class StatementModel(FrozenModel): + ... class CompoundStatement(StatementModel): @@ -649,9 +650,9 @@ def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) - return ResultDocument(meta=meta, rules=rule_matches) def to_capa(self) -> Tuple[Metadata, Dict]: - capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = ( - collections.defaultdict(list) - ) + capabilities: Dict[ + str, List[Tuple[capa.features.address.Address, capa.features.common.Result]] + ] = collections.defaultdict(list) # this doesn't quite work because we don't have the rule source for rules that aren't matched. rules_by_name = { diff --git a/tests/fixtures.py b/tests/fixtures.py index 73ddfc59c..286eaaef8 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -698,142 +698,6 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted( - [ - # file/string - ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True), - ("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True), - ("0000a657", "file", capa.features.common.String("nope"), False), - # file/sections - ("0000a657", "file", capa.features.file.Section(".rdata"), True), - ("0000a657", "file", capa.features.file.Section(".nope"), False), - # file/imports - ("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True), - ("0000a657", "file", capa.features.file.Import("Nope"), False), - # file/exports - ("0000a657", "file", capa.features.file.Export("Nope"), False), - # process/environment variables - ( - "0000a657", - "process=(1180:3052)", - capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"), - True, - ), - ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False), - # thread/api calls - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False), - # thread/number call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False), - # thread/string call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), False), - ("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), True), - ("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), False), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - -DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted( - # TODO(yelhamer): use the same sample for testing CAPE and Drakvuf extractors - # https://github.com/mandiant/capa/issues/2180 - [ - # file/string - ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1), - ("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1), - ("0000a657", "file", capa.features.common.String("nope"), 0), - # file/sections - ("0000a657", "file", capa.features.file.Section(".rdata"), 1), - ("0000a657", "file", capa.features.file.Section(".nope"), 0), - # file/imports - ("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1), - ("0000a657", "file", capa.features.file.Import("Nope"), 0), - # file/exports - ("0000a657", "file", capa.features.file.Export("Nope"), 0), - # process/environment variables - ( - "0000a657", - "process=(1180:3052)", - capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"), - 2, - ), - ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0), - # thread/api calls - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 7), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0), - # thread/number call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0), - # thread/string call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), 1), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), 0), - ("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), 1), - ("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), 0), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - -DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted( - [ - ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), - # file/imports - ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), True), - # thread/api calls - ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True), - ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), - # call/api - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("LdrLoadDll"), True), - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("DoesNotExist"), False), - # call/string argument - ( - "93b2d1", - "process=(3564:4852),thread=6592, call=1", - capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), - True, - ), - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.common.String("non_existant"), False), - # call/number argument - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x801), True), - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x010101010101), False), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - -DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS = sorted( - [ - ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), - # file/imports - ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), 1), - # thread/api calls - ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), 9), - ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), - # call/api - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("LdrLoadDll"), 1), - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.API("DoesNotExist"), 0), - # call/string argument - ( - "93b2d1", - "process=(3564:4852),thread=6592, call=1", - capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), - 1, - ), - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.common.String("non_existant"), 0), - # call/number argument - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x801), 1), - ("93b2d1", "process=(3564:4852),thread=6592, call=1", capa.features.insn.Number(0x010101010101), 0), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - FEATURE_PRESENCE_TESTS = sorted( [ # file/characteristic("embedded pe") diff --git a/tests/test_cape_features.py b/tests/test_cape_features.py index 769d88a39..36c09cb70 100644 --- a/tests/test_cape_features.py +++ b/tests/test_cape_features.py @@ -8,10 +8,96 @@ import fixtures +import capa.main +import capa.features.file +import capa.features.insn +import capa.features.common +import capa.features.basicblock + +DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted( + [ + # file/string + ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True), + ("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True), + ("0000a657", "file", capa.features.common.String("nope"), False), + # file/sections + ("0000a657", "file", capa.features.file.Section(".rdata"), True), + ("0000a657", "file", capa.features.file.Section(".nope"), False), + # file/imports + ("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True), + ("0000a657", "file", capa.features.file.Import("Nope"), False), + # file/exports + ("0000a657", "file", capa.features.file.Export("Nope"), False), + # process/environment variables + ( + "0000a657", + "process=(1180:3052)", + capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"), + True, + ), + ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False), + # thread/api calls + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True), + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False), + # thread/number call argument + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True), + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False), + # thread/string call argument + ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), True), + ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), False), + ("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), True), + ("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), False), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + +DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted( + # TODO(yelhamer): use the same sample for testing CAPE and Drakvuf extractors + # https://github.com/mandiant/capa/issues/2180 + [ + # file/string + ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1), + ("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1), + ("0000a657", "file", capa.features.common.String("nope"), 0), + # file/sections + ("0000a657", "file", capa.features.file.Section(".rdata"), 1), + ("0000a657", "file", capa.features.file.Section(".nope"), 0), + # file/imports + ("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1), + ("0000a657", "file", capa.features.file.Import("Nope"), 0), + # file/exports + ("0000a657", "file", capa.features.file.Export("Nope"), 0), + # process/environment variables + ( + "0000a657", + "process=(1180:3052)", + capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"), + 2, + ), + ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0), + # thread/api calls + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 7), + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0), + # thread/number call argument + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1), + ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0), + # thread/string call argument + ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), 1), + ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), 0), + ("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), 1), + ("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), 0), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS, + DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"], ) def test_cape_features(sample, scope, feature, expected): @@ -20,7 +106,7 @@ def test_cape_features(sample, scope, feature, expected): @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.DYNAMIC_CAPE_FEATURE_COUNT_TESTS, + DYNAMIC_CAPE_FEATURE_COUNT_TESTS, indirect=["sample", "scope"], ) def test_cape_feature_counts(sample, scope, feature, expected): diff --git a/tests/test_drakvuf_features.py b/tests/test_drakvuf_features.py index 57dc37b31..79832fc34 100644 --- a/tests/test_drakvuf_features.py +++ b/tests/test_drakvuf_features.py @@ -8,10 +8,71 @@ import fixtures +import capa.main +import capa.features.file +import capa.features.insn +import capa.features.common + +DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted( + [ + ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), + # file/imports + ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), True), + # thread/api calls + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True), + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), + # call/api + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), True), + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), False), + # call/string argument + ( + "93b2d1", + "process=(3564:4852),thread=6592,call=1", + capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), + True, + ), + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.common.String("non_existant"), False), + # call/number argument + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x801), True), + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x010101010101), False), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + +DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS = sorted( + [ + ("93b2d1", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), + # file/imports + ("93b2d1", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), 1), + # thread/api calls + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), 9), + ("93b2d1", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), + # call/api + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), 1), + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), 0), + # call/string argument + ( + "93b2d1", + "process=(3564:4852),thread=6592,call=1", + capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), + 1, + ), + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.common.String("non_existant"), 0), + # call/number argument + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x801), 1), + ("93b2d1", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x010101010101), 0), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS, + DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"], ) def test_drakvuf_features(sample, scope, feature, expected): @@ -20,7 +81,7 @@ def test_drakvuf_features(sample, scope, feature, expected): @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS, + DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS, indirect=["sample", "scope"], ) def test_drakvuf_feature_counts(sample, scope, feature, expected): From 454cd2d7e6fe04ec93c35b838b605fc6cdf3f28e Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:39:48 +0100 Subject: [PATCH 41/59] Update capa/features/extractors/drakvuf/extractor.py Co-authored-by: Willi Ballenthin --- capa/features/extractors/drakvuf/extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 414527880..0b81b67b8 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -82,7 +82,7 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> call.name, ", ".join(f"{arg_name}={arg_value}" for arg_name, arg_value in call.arguments.items()), ( - f" -> {call.return_value}" if hasattr(call, "return_value") else "" + f" -> {getattr(call, 'return_value', '')}" ), # SysCalls don't have a return value, while WinApi calls do ) return call_name From f9d5c4a20d7950e514b11ad2865553cf0d819e14 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:40:19 +0100 Subject: [PATCH 42/59] Update capa/features/extractors/drakvuf/models.py Co-authored-by: Willi Ballenthin --- capa/features/extractors/drakvuf/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 59affa6ef..b13d554aa 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -64,7 +64,7 @@ class Call(ConciseModel): class WinApiCall(Call): - # This class models Windows api calls captured by Drakvuf (DLLs, etc.). + # This class models Windows API calls captured by DRAKVUF (DLLs, etc.). arguments: Dict[str, str] = Field(alias="Arguments") event: str = Field(alias="Event") return_value: str = Field(alias="ReturnValue") From 6617fc0f47b77927c914cf22789756eec1988229 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 16 Jul 2024 14:55:07 +0100 Subject: [PATCH 43/59] styling --- capa/features/address.py | 3 +-- capa/features/extractors/drakvuf/models.py | 12 ++++++------ capa/ida/plugin/form.py | 6 +++--- capa/render/result_document.py | 9 ++++----- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/capa/features/address.py b/capa/features/address.py index f0cbd77d2..45c3a600f 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -10,8 +10,7 @@ class Address(abc.ABC): @abc.abstractmethod - def __eq__(self, other): - ... + def __eq__(self, other): ... @abc.abstractmethod def __lt__(self, other): diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index b13d554aa..9e3ec9b1b 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -125,21 +125,21 @@ def validate_arguments(self) -> "DrakvufReport": @classmethod def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": - values: Dict[str, List] = {"syscalls": [], "apicalls": [], "discovered_dlls": [], "loaded_dlls": []} + report = cls() for entry in entries: plugin = entry.get("Plugin") # TODO(yelhamer): add support for more drakvuf plugins # https://github.com/mandiant/capa/issues/2181 if plugin == "syscall": - values["syscalls"].append(SystemCall(**entry)) + report.syscalls.append(SystemCall(**entry)) elif plugin == "apimon": event = entry.get("Event") if event == "api_called": - values["apicalls"].append(WinApiCall(**entry)) + report.apicalls.append(WinApiCall(**entry)) elif event == "dll_loaded": - values["loaded_dlls"].append(LoadedDLL(**entry)) + report.loaded_dlls.append(LoadedDLL(**entry)) elif event == "dll_discovered": - values["discovered_dlls"].append(DiscoveredDLL(**entry)) + report.discovered_dlls.append(DiscoveredDLL(**entry)) - return DrakvufReport(**values) + return report diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 4cf612051..0aee6cea2 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -932,9 +932,9 @@ def get_ask_use_persistent_cache(self, analyze): update_wait_box("verifying cached results") try: - results: Optional[ - capa.render.result_document.ResultDocument - ] = capa.ida.helpers.load_and_verify_cached_results() + results: Optional[capa.render.result_document.ResultDocument] = ( + capa.ida.helpers.load_and_verify_cached_results() + ) except Exception as e: capa.ida.helpers.inform_user_ida_ui("Failed to verify cached results, reanalyzing program") logger.exception("Failed to verify cached results (error: %s)", e) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 255d70ff9..975e37431 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -160,8 +160,7 @@ class CompoundStatementType: OPTIONAL = "optional" -class StatementModel(FrozenModel): - ... +class StatementModel(FrozenModel): ... class CompoundStatement(StatementModel): @@ -650,9 +649,9 @@ def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) - return ResultDocument(meta=meta, rules=rule_matches) def to_capa(self) -> Tuple[Metadata, Dict]: - capabilities: Dict[ - str, List[Tuple[capa.features.address.Address, capa.features.common.Result]] - ] = collections.defaultdict(list) + capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = ( + collections.defaultdict(list) + ) # this doesn't quite work because we don't have the rule source for rules that aren't matched. rules_by_name = { From 8e7bc75bfb2946d0747f73e681f47d7752204a88 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 16 Jul 2024 15:00:32 +0100 Subject: [PATCH 44/59] drakvuf/extractor.py: black linting --- capa/features/extractors/drakvuf/extractor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 0b81b67b8..1a4f5062e 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -81,9 +81,7 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> call_name = "{}({}){}".format( call.name, ", ".join(f"{arg_name}={arg_value}" for arg_name, arg_value in call.arguments.items()), - ( - f" -> {getattr(call, 'return_value', '')}" - ), # SysCalls don't have a return value, while WinApi calls do + (f" -> {getattr(call, 'return_value', '')}"), # SysCalls don't have a return value, while WinApi calls do ) return call_name From 93240f5f6e714de85f31da15ae123c801708c20a Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Wed, 17 Jul 2024 10:42:28 +0100 Subject: [PATCH 45/59] drakvuf/models.py: remove need to empty report checking --- capa/features/extractors/drakvuf/models.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 9e3ec9b1b..be76301fa 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -10,8 +10,6 @@ from pydantic import Field, BaseModel, ConfigDict, model_validator -from capa.exceptions import EmptyReportError - logger = logging.getLogger(__name__) @@ -117,12 +115,6 @@ class DrakvufReport(ConciseModel): discovered_dlls: List[DiscoveredDLL] = [] loaded_dlls: List[LoadedDLL] = [] - @model_validator(mode="after") - def validate_arguments(self) -> "DrakvufReport": - if not any((self.syscalls, self.apicalls, self.discovered_dlls, self.loaded_dlls)): - raise EmptyReportError("Report is empty") - return self - @classmethod def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": report = cls() From c08c5bfa2fb1950c517121edfb371826faf2421d Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Wed, 17 Jul 2024 11:07:22 +0100 Subject: [PATCH 46/59] tests: add drakvuf models test --- tests/test_drakvuf_models.py | 48 ++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tests/test_drakvuf_models.py diff --git a/tests/test_drakvuf_models.py b/tests/test_drakvuf_models.py new file mode 100644 index 000000000..baf02ce7f --- /dev/null +++ b/tests/test_drakvuf_models.py @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import json + +from capa.features.extractors.drakvuf.models import SystemCall + + +def test_syscall_argument_construction(): + call_dictionary = json.loads( + r""" + { + "Plugin": "syscall", + "TimeStamp": "1716999134.581449", + "PID": 3888, + "PPID": 2852, + "TID": 368, + "UserName": "SessionID", + "UserId": 2, + "ProcessName": "\\Device\\HarddiskVolume2\\Windows\\explorer.exe", + "Method": "NtRemoveIoCompletionEx", + "EventUID": "0x1f", + "Module": "nt", + "vCPU": 0, + "CR3": "0x119b1002", + "Syscall": 369, + "NArgs": 6, + "IoCompletionHandle": "0xffffffff80001ac0", + "IoCompletionInformation": "0xfffff506a0284898", + "Count": "0x1", + "NumEntriesRemoved": "0xfffff506a02846bc", + "Timeout": "0xfffff506a02846d8", + "Alertable": "0x0" + } + """ + ) + call = SystemCall(**call_dictionary) + assert len(call.arguments) == call.nargs + assert call.arguments["IoCompletionHandle"] == "0xffffffff80001ac0" + assert call.arguments["IoCompletionInformation"] == "0xfffff506a0284898" + assert call.arguments["Count"] == "0x1" + assert call.arguments["NumEntriesRemoved"] == "0xfffff506a02846bc" + assert call.arguments["Timeout"] == "0xfffff506a02846d8" + assert call.arguments["Alertable"] == "0x0" From 6e0a9ebe1cb13128a1d159a5c8f742b5dcacfdce Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Fri, 19 Jul 2024 22:20:39 +0100 Subject: [PATCH 47/59] Update capa/features/extractors/drakvuf/global_.py Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- capa/features/extractors/drakvuf/global_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py index bd341f5b9..b9b0d93ab 100644 --- a/capa/features/extractors/drakvuf/global_.py +++ b/capa/features/extractors/drakvuf/global_.py @@ -22,7 +22,7 @@ def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: - # drakvuf sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + # DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield OS(OS_WINDOWS), NO_ADDRESS From 2bb7f3c4cca0cee1e4a794a0f189414c15e42be9 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Fri, 19 Jul 2024 22:20:51 +0100 Subject: [PATCH 48/59] Update tests/test_cape_features.py Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- tests/test_cape_features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cape_features.py b/tests/test_cape_features.py index 36c09cb70..d72caa9ab 100644 --- a/tests/test_cape_features.py +++ b/tests/test_cape_features.py @@ -54,7 +54,7 @@ ) DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted( - # TODO(yelhamer): use the same sample for testing CAPE and Drakvuf extractors + # TODO(yelhamer): use the same sample for testing CAPE and DRAKVUF extractors # https://github.com/mandiant/capa/issues/2180 [ # file/string From c0e9150b96fc93d9ebcb9f884679b85cc4ad34bc Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Fri, 19 Jul 2024 22:26:08 +0100 Subject: [PATCH 49/59] Update capa/features/extractors/drakvuf/models.py Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- capa/features/extractors/drakvuf/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index be76301fa..4ea43979f 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -101,7 +101,7 @@ class SystemCall(Call): @model_validator(mode="before") @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: - # Drakvuf stores argument names and values as entries in the syscall's entry. + # DRAKVUF stores argument names and values as entries in the syscall's entry. # This model validator collects those arguments into a list in the model. values["arguments"] = { name: value for name, value in values.items() if name not in REQUIRED_SYSCALL_FIELD_NAMES From 897e98bee91e1fb6ee07e91dd67f0e91a5a2380d Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Fri, 19 Jul 2024 22:45:17 +0100 Subject: [PATCH 50/59] Apply suggestions from code review: rename Drakvuf to DRAKVUF Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- capa/features/extractors/drakvuf/file.py | 2 +- capa/features/extractors/drakvuf/global_.py | 4 ++-- capa/features/extractors/drakvuf/helpers.py | 2 +- capa/features/extractors/drakvuf/models.py | 4 ++-- capa/helpers.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index 51a4455a3..d93c354b2 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -50,7 +50,7 @@ def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]] FILE_HANDLERS = ( - # TODO(yelhamer): extract more file features from other drakvuf plugins + # TODO(yelhamer): extract more file features from other DRAKVUF plugins # https://github.com/mandiant/capa/issues/2169 extract_import_names, ) diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py index b9b0d93ab..00d18afc7 100644 --- a/capa/features/extractors/drakvuf/global_.py +++ b/capa/features/extractors/drakvuf/global_.py @@ -17,7 +17,7 @@ def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: - # drakvuf sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + # DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Format(FORMAT_PE), NO_ADDRESS @@ -27,7 +27,7 @@ def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: - # drakvuf sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html + # DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 26493b70e..59708f5df 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -19,7 +19,7 @@ def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddres result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} for call in itertools.chain(report.syscalls, report.apicalls): if call.pid == 0: - # Drakvuf captures api/native calls from all processes running on the system. + # DRAKVUF captures api/native calls from all processes running on the system. # we ignore the pid 0 since it's a system process and it's unlikely for it to # be hijacked or so on, in addition to capa addresses not supporting null pids continue diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index 4ea43979f..fbfd649c7 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -92,7 +92,7 @@ class SystemCall(Call): # "IoCompletionHandle": "0xffffffff80001ac0", "IoCompletionReserveHandle": "0xffffffff8000188c", # "KeyContext": "0x0", "ApcContext": "0x2", "IoStatus": "0x7ffb00000000", "IoStatusInformation": "0x0" # } - # The keys up until "NArgs" are common to all the native calls that Drakvuf reports, with + # The keys up until "NArgs" are common to all the native calls that DRAKVUF reports, with # the remaining keys representing the call's specific arguments. syscall_number: int = Field(alias="Syscall") module: str = Field(alias="Module") @@ -121,7 +121,7 @@ def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": for entry in entries: plugin = entry.get("Plugin") - # TODO(yelhamer): add support for more drakvuf plugins + # TODO(yelhamer): add support for more DRAKVUF plugins # https://github.com/mandiant/capa/issues/2181 if plugin == "syscall": report.syscalls.append(SystemCall(**entry)) diff --git a/capa/helpers.py b/capa/helpers.py index 57c09bcc6..8cb12e62b 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -33,7 +33,7 @@ EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") # CAPE extensions: .json, .json_, .json.gz -# Drakvuf Sandbox extensions: .log, .log.gz +# DRAKVUF Sandbox extensions: .log, .log.gz EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log", ".log.gz") EXTENSIONS_ELF = "elf_" EXTENSIONS_FREEZE = "frz" @@ -101,7 +101,7 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): obj = msgspec.json.decode(line_s) yield obj except (msgspec.DecodeError, UnicodeDecodeError): - # sometimes Drakvuf reports bad method names and/or malformed JSON + # sometimes DRAKVUF reports bad method names and/or malformed JSON logger.debug("bad DRAKVUF log line: %s", line) From e786552480f4c486968c461fd8c55ec3e0b3d078 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 23 Jul 2024 12:20:43 +0100 Subject: [PATCH 51/59] drakvuf/call.py: use int(..., 0) instead of str_to_number() --- capa/features/extractors/drakvuf/call.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index b46947698..5c141b673 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -9,7 +9,6 @@ import logging from typing import Tuple, Iterator -from capa.helpers import str_to_number from capa.features.insn import API, Number from capa.features.common import String, Feature from capa.features.address import Address @@ -37,7 +36,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - # list similar to disassembly: arguments right-to-left, call for arg_value in reversed(call.arguments.values()): try: - yield Number(str_to_number(arg_value)), ch.address + yield Number(int(arg_value, 0)), ch.address except ValueError: # yield argument as a string yield String(arg_value), ch.address From 4cab9754bc1949ce628b53ba8e8e0040eb24d059 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 23 Jul 2024 12:27:21 +0100 Subject: [PATCH 52/59] remove str_to_number --- capa/helpers.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index 8cb12e62b..92ed967ec 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -49,15 +49,6 @@ def hex(n: int) -> str: return f"0x{(n):X}" -def str_to_number(s: str) -> int: - if s.isdecimal(): - return int(s) - try: - return int(s, 16) - except ValueError: - raise ValueError(f"{s} is not a valid number.") - - def get_file_taste(sample_path: Path) -> bytes: if not sample_path.exists(): raise IOError(f"sample path {sample_path} does not exist or cannot be accessed") From 2576aa1133180c0a977588cd73aedd0032a6cc03 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 23 Jul 2024 14:51:14 +0100 Subject: [PATCH 53/59] drakvuf/call.py: yield argument memory address value as well --- capa/features/extractors/drakvuf/call.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 5c141b673..2fa89201c 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -38,7 +38,12 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - try: yield Number(int(arg_value, 0)), ch.address except ValueError: - # yield argument as a string + if ":" in arg_value and arg_value.startswith("0x"): + # if the argument is in the format: memory_address:str (e.g. '0xc6f217efe0:'"ntdll.dll"') + # then return the contents of that memory address on its own as well. + addr, val = arg_value.split(":", maxsplit=1) + yield String(val) + # yield the entire string regardless in case of unexpected argument value formats yield String(arg_value), ch.address yield API(call.name), ch.address From b5047a23d2f727595880f7eb595939a42de581bf Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:12:17 +0100 Subject: [PATCH 54/59] Update call.py: remove verbosity in yield statement --- capa/features/extractors/drakvuf/call.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 2fa89201c..76cfb46ad 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -41,8 +41,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - if ":" in arg_value and arg_value.startswith("0x"): # if the argument is in the format: memory_address:str (e.g. '0xc6f217efe0:'"ntdll.dll"') # then return the contents of that memory address on its own as well. - addr, val = arg_value.split(":", maxsplit=1) - yield String(val) + yield String(arg_value.split(":", maxsplit=1)[1]) # yield the entire string regardless in case of unexpected argument value formats yield String(arg_value), ch.address From e26072eca5c329aaf8996d80ca4c0ecceab35527 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:24:06 +0100 Subject: [PATCH 55/59] Update call.py: yield missing address as well --- capa/features/extractors/drakvuf/call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 76cfb46ad..f95ae19e7 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -41,7 +41,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - if ":" in arg_value and arg_value.startswith("0x"): # if the argument is in the format: memory_address:str (e.g. '0xc6f217efe0:'"ntdll.dll"') # then return the contents of that memory address on its own as well. - yield String(arg_value.split(":", maxsplit=1)[1]) + yield String(arg_value.split(":", maxsplit=1)[1]), ch.address # yield the entire string regardless in case of unexpected argument value formats yield String(arg_value), ch.address From d9e3ca1b5ebaef6eff7bcbc3c5be40b115de5dd0 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Wed, 24 Jul 2024 12:31:47 +0100 Subject: [PATCH 56/59] drakvuf/call.py: yield entire argument string only --- capa/features/extractors/drakvuf/call.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index f95ae19e7..34e877acc 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -38,11 +38,10 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - try: yield Number(int(arg_value, 0)), ch.address except ValueError: - if ":" in arg_value and arg_value.startswith("0x"): - # if the argument is in the format: memory_address:str (e.g. '0xc6f217efe0:'"ntdll.dll"') - # then return the contents of that memory address on its own as well. - yield String(arg_value.split(":", maxsplit=1)[1]), ch.address - # yield the entire string regardless in case of unexpected argument value formats + # DRAKVUF automatically resolves the contents of memory addresses, (e.g. Arg1="0xc6f217efe0:\"ntdll.dll\""). + # For those cases we yield the entire string as it, since yielding the address only would + # likely not provide any matches, and yielding just the memory contentswould probably be misleading, + # but yielding the entire string would be helpful for an analyst looking at the verbose output yield String(arg_value), ch.address yield API(call.name), ch.address From 3e3be41487fe2864cd4582a201777dad94270214 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Wed, 24 Jul 2024 12:37:38 +0100 Subject: [PATCH 57/59] update readme.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a50c90a25..68f1fd7e7 100644 --- a/README.md +++ b/README.md @@ -126,8 +126,10 @@ function @ 0x4011C0 ... ``` -Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capability extraction. -In order to use this, you first submit your sample to CAPE for analysis, and then run capa against the generated report (JSON). +Additionally, capa also supports analyzing sandbox reports for dynamic capability extraction. +In order to use this, you first submit your sample to one of supported sandboxes for analysis, and then run capa against the generated report file. + +Currently, capa supports the [CAPE sandbox](https://github.com/kevoreilly/CAPEv2) and the [DRAKVUF sandbox](https://github.com/CERT-Polska/drakvuf-sandbox/). In order to use either, simlpy run capa against the generated file (JSON for CAPE or LOG for DRAKVUF sandbox) and it will automatically detect the sandbox and extract capabilities from it. Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary: From 729679d5d56c97de49acee8f1f36c373acbc635a Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:39:12 +0100 Subject: [PATCH 58/59] Update README.md: typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 68f1fd7e7..882b5cb3d 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ function @ 0x4011C0 Additionally, capa also supports analyzing sandbox reports for dynamic capability extraction. In order to use this, you first submit your sample to one of supported sandboxes for analysis, and then run capa against the generated report file. -Currently, capa supports the [CAPE sandbox](https://github.com/kevoreilly/CAPEv2) and the [DRAKVUF sandbox](https://github.com/CERT-Polska/drakvuf-sandbox/). In order to use either, simlpy run capa against the generated file (JSON for CAPE or LOG for DRAKVUF sandbox) and it will automatically detect the sandbox and extract capabilities from it. +Currently, capa supports the [CAPE sandbox](https://github.com/kevoreilly/CAPEv2) and the [DRAKVUF sandbox](https://github.com/CERT-Polska/drakvuf-sandbox/). In order to use either, simply run capa against the generated file (JSON for CAPE or LOG for DRAKVUF sandbox) and it will automatically detect the sandbox and extract capabilities from it. Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary: From 3fb0eaf7a25229a9ef0310a4236c373e7430fbc2 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 24 Jul 2024 14:21:11 +0200 Subject: [PATCH 59/59] Update CHANGELOG.md Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eced14fac..49c1fe1cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## master (unreleased) ### New Features -- support analyzing DRAKVUF sandbox traces #2143 @yelhamer +- support analyzing DRAKVUF traces #2143 @yelhamer ### Breaking Changes