Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

record and show the analysis flavor #1713

Merged
merged 6 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions capa/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def collect_metadata(rules: List[Path]):
sha256=sha256,
path=idaapi.get_input_file_path(),
),
flavor=rdoc.Flavor.STATIC,
analysis=rdoc.StaticAnalysis(
format=idaapi.get_file_type_name(),
arch=arch,
Expand Down
9 changes: 9 additions & 0 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import colorama
import tqdm.contrib.logging
from pefile import PEFormatError
from typing_extensions import assert_never
from elftools.common.exceptions import ELFError

import capa.perf
Expand Down Expand Up @@ -1022,6 +1023,13 @@ def collect_metadata(
arch = get_arch(sample_path)
os_ = get_os(sample_path) if os_ == OS_AUTO else os_

if isinstance(extractor, StaticFeatureExtractor):
flavor = rdoc.Flavor.STATIC
elif isinstance(extractor, DynamicFeatureExtractor):
flavor = rdoc.Flavor.DYNAMIC
else:
assert_never(extractor)

return rdoc.Metadata(
timestamp=datetime.datetime.now(),
version=capa.version.__version__,
Expand All @@ -1032,6 +1040,7 @@ def collect_metadata(
sha256=sha256,
path=str(Path(sample_path).resolve()),
),
flavor=flavor,
analysis=get_sample_analysis(
format_,
arch,
Expand Down
1 change: 1 addition & 0 deletions capa/render/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO):
(width("md5", 22), width(doc.meta.sample.md5, 82)),
("sha1", doc.meta.sample.sha1),
("sha256", doc.meta.sample.sha256),
("analysis", doc.meta.flavor),
("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format),
("arch", doc.meta.analysis.arch),
Expand Down
20 changes: 20 additions & 0 deletions capa/render/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,23 @@ def scope_to_pb2(scope: capa.rules.Scope) -> capa_pb2.Scope.ValueType:
assert_never(scope)


def flavor_to_pb2(flavor: rd.Flavor) -> capa_pb2.Flavor.ValueType:
if flavor == rd.Flavor.STATIC:
return capa_pb2.Flavor.FLAVOR_STATIC
elif flavor == rd.Flavor.DYNAMIC:
return capa_pb2.Flavor.FLAVOR_DYNAMIC
else:
assert_never(flavor)


def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
assert isinstance(meta.analysis, rd.StaticAnalysis)
return capa_pb2.Metadata(
timestamp=str(meta.timestamp),
version=meta.version,
argv=meta.argv,
sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
flavor=flavor_to_pb2(meta.flavor),
analysis=capa_pb2.Analysis(
format=meta.analysis.format,
arch=meta.analysis.arch,
Expand Down Expand Up @@ -480,6 +490,15 @@ def scope_from_pb2(scope: capa_pb2.Scope.ValueType) -> capa.rules.Scope:
assert_never(scope)


def flavor_from_pb2(flavor: capa_pb2.Flavor.ValueType) -> rd.Flavor:
if flavor == capa_pb2.Flavor.FLAVOR_STATIC:
return rd.Flavor.STATIC
elif flavor == capa_pb2.Flavor.FLAVOR_DYNAMIC:
return rd.Flavor.DYNAMIC
else:
assert_never(flavor)


def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
return rd.Metadata(
timestamp=datetime.datetime.fromisoformat(meta.timestamp),
Expand All @@ -491,6 +510,7 @@ def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
sha256=meta.sample.sha256,
path=meta.sample.path,
),
flavor=flavor_from_pb2(meta.flavor),
analysis=rd.StaticAnalysis(
format=meta.analysis.format,
arch=meta.analysis.arch,
Expand Down
7 changes: 7 additions & 0 deletions capa/render/proto/capa.proto
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,19 @@ message MatchFeature {
optional string description = 3;
}

enum Flavor {
FLAVOR_UNSPECIFIED = 0;
FLAVOR_STATIC = 1;
FLAVOR_DYNAMIC = 2;
}

message Metadata {
string timestamp = 1; // iso8601 format, like: 2019-01-01T00:00:00Z
string version = 2;
repeated string argv = 3;
Sample sample = 4;
Analysis analysis = 5;
Flavor flavor = 6;
}

message MnemonicFeature {
Expand Down
3,844 changes: 3,720 additions & 124 deletions capa/render/proto/capa_pb2.py
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i used the protobuf-compiler available on debian (versus nix or something else before) and it generates this python-native representation, which i like better than the opaque binary descriptors from before.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kinda explodes the line-count, though

Large diffs are not rendered by default.

22 changes: 21 additions & 1 deletion capa/render/proto/capa_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ ADDRESSTYPE_DN_TOKEN_OFFSET: AddressType.ValueType # 5
ADDRESSTYPE_NO_ADDRESS: AddressType.ValueType # 6
global___AddressType = AddressType

class _Flavor:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType

class _FlavorEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_Flavor.ValueType], builtins.type):
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
FLAVOR_UNSPECIFIED: _Flavor.ValueType # 0
FLAVOR_STATIC: _Flavor.ValueType # 1
FLAVOR_DYNAMIC: _Flavor.ValueType # 2

class Flavor(_Flavor, metaclass=_FlavorEnumTypeWrapper): ...

FLAVOR_UNSPECIFIED: Flavor.ValueType # 0
FLAVOR_STATIC: Flavor.ValueType # 1
FLAVOR_DYNAMIC: Flavor.ValueType # 2
global___Flavor = Flavor

class _Scope:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType
Expand Down Expand Up @@ -776,6 +793,7 @@ class Metadata(google.protobuf.message.Message):
ARGV_FIELD_NUMBER: builtins.int
SAMPLE_FIELD_NUMBER: builtins.int
ANALYSIS_FIELD_NUMBER: builtins.int
FLAVOR_FIELD_NUMBER: builtins.int
timestamp: builtins.str
"""iso8601 format, like: 2019-01-01T00:00:00Z"""
version: builtins.str
Expand All @@ -785,6 +803,7 @@ class Metadata(google.protobuf.message.Message):
def sample(self) -> global___Sample: ...
@property
def analysis(self) -> global___Analysis: ...
flavor: global___Flavor.ValueType
def __init__(
self,
*,
Expand All @@ -793,9 +812,10 @@ class Metadata(google.protobuf.message.Message):
argv: collections.abc.Iterable[builtins.str] | None = ...,
sample: global___Sample | None = ...,
analysis: global___Analysis | None = ...,
flavor: global___Flavor.ValueType = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "sample", b"sample"]) -> builtins.bool: ...
def ClearField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "argv", b"argv", "sample", b"sample", "timestamp", b"timestamp", "version", b"version"]) -> None: ...
def ClearField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "argv", b"argv", "flavor", b"flavor", "sample", b"sample", "timestamp", b"timestamp", "version", b"version"]) -> None: ...

global___Metadata = Metadata

Expand Down
7 changes: 7 additions & 0 deletions capa/render/result_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import datetime
import collections
from enum import Enum
from typing import Dict, List, Tuple, Union, Literal, Optional

from pydantic import Field, BaseModel, ConfigDict
Expand Down Expand Up @@ -120,11 +121,17 @@ class DynamicAnalysis(Model):
Analysis: TypeAlias = Union[StaticAnalysis, DynamicAnalysis]


class Flavor(str, Enum):
STATIC = "static"
DYNAMIC = "dynamic"


class Metadata(Model):
timestamp: datetime.datetime
version: str
argv: Optional[Tuple[str, ...]]
sample: Sample
flavor: Flavor
analysis: Analysis


Expand Down
13 changes: 9 additions & 4 deletions capa/render/verbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ def format_address(address: frz.Address) -> str:
return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}"
elif address.type == frz.AddressType.DYNAMIC:
assert isinstance(address.value, tuple)
id_, return_address = address.value
assert isinstance(id_, int)
assert isinstance(return_address, int)
return f"event: {id_}, retaddr: 0x{return_address:x}"
ppid, pid, tid, id_, return_address = address.value
return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}, return address: {capa.helpers.hex(return_address)}"
elif address.type == frz.AddressType.PROCESS:
assert isinstance(address.value, tuple)
ppid, pid = address.value
Expand All @@ -71,6 +69,10 @@ def format_address(address: frz.Address) -> str:
tid = address.value
assert isinstance(tid, int)
return f"thread id: {tid}"
elif address.type == frz.AddressType.CALL:
assert isinstance(address.value, tuple)
ppid, pid, tid, id_ = address.value
return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}"
elif address.type == frz.AddressType.NO_ADDRESS:
return "global"
else:
Expand All @@ -90,6 +92,7 @@ def render_static_meta(ostream, doc: rd.ResultDocument):
os windows
format pe
arch amd64
analysis static
extractor VivisectFeatureExtractor
base address 0x10000000
rules (embedded rules)
Expand All @@ -108,6 +111,7 @@ def render_static_meta(ostream, doc: rd.ResultDocument):
("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format),
("arch", doc.meta.analysis.arch),
("analysis", doc.meta.flavor),
("extractor", doc.meta.analysis.extractor),
("base address", format_address(doc.meta.analysis.base_address)),
("rules", "\n".join(doc.meta.analysis.rules)),
Expand Down Expand Up @@ -152,6 +156,7 @@ def render_dynamic_meta(ostream, doc: rd.ResultDocument):
("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format),
("arch", doc.meta.analysis.arch),
("analysis", doc.meta.flavor),
("extractor", doc.meta.analysis.extractor),
("rules", "\n".join(doc.meta.analysis.rules)),
("process count", len(doc.meta.analysis.feature_counts.processes)),
Expand Down
9 changes: 9 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,15 @@ def get_data_path_by_name(name) -> Path:
/ "v2.2"
/ "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
)
elif name.startswith("d46900"):
return (
CD
/ "data"
/ "dynamic"
/ "cape"
/ "v2.2"
/ "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
)
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
else:
Expand Down
58 changes: 58 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import gzip
import json
import textwrap
from pathlib import Path

import pytest
import fixtures
Expand Down Expand Up @@ -582,3 +584,59 @@ def test_main_rd():
assert capa.main.main([path, "-j"]) == 0
assert capa.main.main([path, "-q"]) == 0
assert capa.main.main([path]) == 0


def extract_cape_report(tmp_path: Path, gz: Path) -> Path:
report = tmp_path / "report.json"
report.write_bytes(gzip.decompress(gz.read_bytes()))
return report


def test_main_cape1(tmp_path):
path = extract_cape_report(tmp_path, fixtures.get_data_path_by_name("0000a657"))

# TODO(williballenthin): use default rules set
# https://github.com/mandiant/capa/pull/1696
rules = tmp_path / "rules"
rules.mkdir()
(rules / "create-or-open-registry-key.yml").write_text(
textwrap.dedent(
"""
rule:
meta:
name: create or open registry key
authors:
- testing
scopes:
static: instruction
dynamic: call
features:
- or:
- api: advapi32.RegOpenKey
- api: advapi32.RegOpenKeyEx
- api: advapi32.RegCreateKey
- api: advapi32.RegCreateKeyEx
- api: advapi32.RegOpenCurrentUser
- api: advapi32.RegOpenKeyTransacted
- api: advapi32.RegOpenUserClassesRoot
- api: advapi32.RegCreateKeyTransacted
- api: ZwOpenKey
- api: ZwOpenKeyEx
- api: ZwCreateKey
- api: ZwOpenKeyTransacted
- api: ZwOpenKeyTransactedEx
- api: ZwCreateKeyTransacted
- api: NtOpenKey
- api: NtCreateKey
- api: SHRegOpenUSKey
- api: SHRegCreateUSKey
- api: RtlCreateRegistryKey
"""
)
)

assert capa.main.main([str(path), "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-j", "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-v", "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-vv", "-r", str(rules)]) == 0