Skip to content

Commit

Permalink
Filter unrelated file types
Browse files Browse the repository at this point in the history
  • Loading branch information
ErebusZ committed Jul 22, 2024
1 parent f0282e4 commit 7899380
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 1 deletion.
12 changes: 11 additions & 1 deletion agent/trufflehog_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from agent import input_type_handler
from agent import utils

BLACKLISTED_FILE_TYPES = ["image", "apple_image", "font", "css", "apk", "xapk", "ipa"]

logging.basicConfig(
format="%(message)s",
datefmt="[%X]",
Expand Down Expand Up @@ -101,7 +103,15 @@ def process(self, message: m.Message) -> None:
return
cmd_output = self.run_scanner(link_type, link)
elif message.selector.startswith("v3.asset.file"):
cmd_output = _process_file(message.data.get("content", b""))
path = message.data.get("path", "")
content = message.data.get("content", b"")
file_type = utils.get_file_type(filename=path, file_content=content)
if file_type in BLACKLISTED_FILE_TYPES:
logger.info(
"Skipping file %s with blacklisted type %s", path, file_type
)
return
cmd_output = _process_file(content)
elif message.selector.startswith("v3.capture.logs"):
content = message.data.get("message", "")
cmd_output = _process_file(content.encode("utf-8"))
Expand Down
55 changes: 55 additions & 0 deletions agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import json
from typing import Any

import magic


def load_newline_json(byte_data: bytes) -> list[dict[str, Any]]:
"""Convertes bytes to a list of dictionaries.
Expand Down Expand Up @@ -47,3 +49,56 @@ def escape_backtick(text: str) -> str:
The modified text with backticks escaped.
"""
return text.replace("`", r"\`")


def get_file_type(filename: str, file_content: bytes) -> str:
"""Method responsible for getting the file type.
Args:
filename: Name of the file.
file_content: Content of the file.
Returns:
File type as a string.
"""
magic_type = magic.from_buffer(file_content)
magic_mime_type = magic.from_buffer(file_content, mime=True)
if (
magic_type == "Android binary XML"
and filename.endswith("AndroidManifest.xml") is True
):
return "android_manifest"

Check warning on line 68 in agent/utils.py

View check run for this annotation

Codecov / codecov/patch

agent/utils.py#L68

Added line #L68 was not covered by tests
if magic_type == "Android binary XML":
return "android_binary_xml"

Check warning on line 70 in agent/utils.py

View check run for this annotation

Codecov / codecov/patch

agent/utils.py#L70

Added line #L70 was not covered by tests
if filename.endswith(".js") or filename.endswith(".jsbundle"):
return "js"
if filename.endswith(".html"):
return "html"
if filename.endswith(".dll"):
return "dll"
if filename == "resources.arsc" and magic_mime_type == "application/octet-stream":
return "android_resource"

Check warning on line 78 in agent/utils.py

View check run for this annotation

Codecov / codecov/patch

agent/utils.py#L78

Added line #L78 was not covered by tests
if (
magic_type
== "PE32 executable (DLL) (console) Intel 80386 Mono/.Net assembly, for MS Windows"
):
return "dotnet_dll"

Check warning on line 83 in agent/utils.py

View check run for this annotation

Codecov / codecov/patch

agent/utils.py#L83

Added line #L83 was not covered by tests
if filename.endswith(".plist") and magic_type == "Apple binary property list":
return "binary_plist"
if filename.endswith(".plist") and magic_type.startswith("XML"):
return "xml_plist"
if filename.endswith(".xml"):
return "xml"
if magic_mime_type.startswith("image/"):
return "image"
if filename.endswith(".json"):
return "json"
if magic_mime_type.startswith("font/") or "Font Format" in magic_type:
return "font"
if filename.endswith(".css"):
return "css"
if filename.endswith(".apk"):
return "apk"
if filename.endswith(".ipa"):
return "ipa"
if filename.endswith(".xapk"):
return "xapk"
return "unknown"
1 change: 1 addition & 0 deletions requirement.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
ostorlab[agent]
python-magic
rich
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,12 @@ def trufflehog_agent_file(
)
agent_object = trufflehog_agent.TruffleHogAgent(definition, settings)
return agent_object


@pytest.fixture
def apk_message_file() -> message.Message:
"""Creates a dummy message of type v3.asset.file that wraps an apk file."""
selector = "v3.asset.file"
with open("./tests/files/fake.apk", "rb") as infile:
msg_data = {"content": infile.read(), "path": "tests/files/fake.apk"}
return message.Message.from_data(selector, data=msg_data)
Empty file added tests/files/fake.apk
Empty file.
14 changes: 14 additions & 0 deletions tests/trufflehog_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,17 @@ def testTrufflehog_whenProcessingVerifiedAndUnverifiedSecrets_shouldReportOnlyVe
"Secret `https://admin:[email protected]` found in file `magic_is_real.js`."
== agent_mock[0].data.get("technical_detail")
)


def testSubprocessParameter_whenProcessingFile_beValid(
apk_message_file: message.Message,
trufflehog_agent_file: trufflehog_agent.TruffleHogAgent,
mocker: plugin.MockerFixture,
agent_mock: list[message.Message],
) -> None:
subprocess_mock = mocker.patch("subprocess.check_output", return_value=b"")

trufflehog_agent_file.process(apk_message_file)

assert len(agent_mock) == 0
assert subprocess_mock.call_count == 0
39 changes: 39 additions & 0 deletions tests/utils_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Unittest for helper funstions"""

import pytest

from agent import utils


Expand Down Expand Up @@ -100,3 +102,40 @@ def testPruneReports_always_dedupCorrectly() -> None:
def testEscapeBacktick_always_returnExpectedText() -> None:
token_with_backtick = "SomeSecret`super`"
assert utils.escape_backtick(token_with_backtick) == "SomeSecret\\`super\\`"


@pytest.mark.parametrize(
"path,content,type",
[
(
"some/path.jpg",
b"\xff\xd8\xff\xe0\x00\x10\x4a\x46\x49\x46\x00\x01\x01\x01\x00\x60",
"image",
),
(
"some/path.plist",
b"bplist00\xd1\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"binary_plist",
),
(
"some/otherpath.plist",
b'<?xml version="1.0" encoding="UTF-8"?>\n',
"xml_plist",
),
("some/path.js", b"", "js"),
("some/path.html", b"", "html"),
("some/path.dll", b"", "dll"),
("some/path.xml", b"", "xml"),
("some/path.json", b"", "json"),
("some/path.css", b"", "css"),
("some/path.apk", b"", "apk"),
("some/path.ipa", b"", "ipa"),
("some/path.xapk", b"", "xapk"),
("some/path.font", b"\x77\x4f\x46\x32", "font"),
("some/path.stuff", b"", "unknown"),
],
)
def testGetFileType_always_detectTheCorrectType(
path: str, content: bytes, type: str
) -> None:
assert utils.get_file_type(path, content) == type

0 comments on commit 7899380

Please sign in to comment.