Skip to content

Commit

Permalink
add idalib backend
Browse files Browse the repository at this point in the history
  • Loading branch information
williballenthin committed Sep 23, 2024
1 parent a8e5261 commit ac94f87
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 100 deletions.
3 changes: 3 additions & 0 deletions .github/pyinstaller/pyinstaller.spec
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ a = Analysis(
"qt5",
"pyqtwebengine",
"pyasn1",
# don't pull in Binary Ninja/IDA bindings that should
# only be installed locally.
"binaryninja",
"ida",
],
)

Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### New Features

- add IDA v9.0 backend via idalib #2376 @williballenthin

### Breaking Changes

### New Rules (0)
Expand Down
5 changes: 3 additions & 2 deletions capa/features/extractors/ida/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import List, Tuple, Iterator

import idaapi
import ida_nalt

import capa.ida.helpers
import capa.features.extractors.elf
Expand All @@ -32,7 +31,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
def __init__(self):
super().__init__(
hashes=SampleHashes(
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
md5=capa.ida.helpers.retrieve_input_file_md5(),
sha1="(unknown)",
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
)
)
self.global_features: List[Tuple[Feature, Address]] = []
Expand Down
113 changes: 113 additions & 0 deletions capa/features/extractors/ida/idalib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import sys
import json
import logging
import importlib.util
from typing import Optional
from pathlib import Path

logger = logging.getLogger(__name__)


def is_idalib_installed() -> bool:
try:
return importlib.util.find_spec("ida") is not None
except ModuleNotFoundError:
return False


def get_idalib_user_config_path() -> Optional[Path]:
"""Get the path to the user's config file based on platform following IDA's user directories."""
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4

if sys.platform == "win32":
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
else:
# On macOS and Linux, use ~/.idapro
config_dir = Path.home() / ".idapro"

# Return the full path to the config file (now in JSON format)
user_config_path = config_dir / "ida-config.json"
if not user_config_path.exists():
return None
return user_config_path


def find_idalib() -> Optional[Path]:
config_path = get_idalib_user_config_path()
if not config_path:
return None

config = json.loads(config_path.read_text(encoding="utf-8"))

try:
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
except KeyError:
return None

if not ida_install_dir.exists():
return None

libname = {
"win32": "idalib.dll",
"linux": "libidalib.so",
"linux2": "libidalib.so",
"darwin": "libidalib.dylib",
}[sys.platform]

if not (ida_install_dir / "ida.hlp").is_file():
return None

if not (ida_install_dir / libname).is_file():
return None

idalib_path = ida_install_dir / "idalib" / "python"
if not idalib_path.exists():
return None

if not (idalib_path / "ida" / "__init__.py").is_file():
return None

return idalib_path


def has_idalib() -> bool:
if is_idalib_installed():
logger.debug("found installed IDA idalib API")
return True

logger.debug("IDA idalib API not installed, searching...")

idalib_path = find_idalib()
if not idalib_path:
logger.debug("failed to find IDA idalib installation")

logger.debug("found IDA idalib API: %s", idalib_path)
return idalib_path is not None


def load_idalib() -> bool:
try:
import ida

return True
except ImportError:
idalib_path = find_idalib()
if not idalib_path:
return False

sys.path.append(idalib_path.absolute().as_posix())
try:
import ida # noqa: F401 unused import

return True
except ImportError:
return False
56 changes: 56 additions & 0 deletions capa/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
import os
import sys
import gzip
import ctypes
import inspect
import logging
import tempfile
import contextlib
import importlib.util
from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn
Expand Down Expand Up @@ -81,6 +84,59 @@ def assert_never(value) -> NoReturn:
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011


@contextlib.contextmanager
def stdout_redirector(stream):
"""
Redirect stdout at the C runtime level,
which lets us handle native libraries that spam stdout.
*But*, this only works on Linux! Otherwise will silently still write to stdout.
So, try to upstream the fix when possible.
Via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
"""
if sys.platform not in ("linux", "linux2"):
logger.warning("Unable to capture STDOUT on non-Linux (begin)")
yield
logger.warning("Unable to capture STDOUT on non-Linux (end)")
return

# libc is only on Linux
LIBC = ctypes.CDLL(None)
C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")

# The original fd stdout points to. Usually 1 on POSIX systems.
original_stdout_fd = sys.stdout.fileno()

def _redirect_stdout(to_fd):
"""Redirect stdout to the given file descriptor."""
# Flush the C-level buffer stdout
LIBC.fflush(C_STDOUT)
# Flush and close sys.stdout - also closes the file descriptor (fd)
sys.stdout.close()
# Make original_stdout_fd point to the same file as to_fd
os.dup2(to_fd, original_stdout_fd)
# Create a new sys.stdout that points to the redirected fd
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, "wb"))

# Save a copy of the original stdout fd in saved_stdout_fd
saved_stdout_fd = os.dup(original_stdout_fd)
try:
# Create a temporary file and redirect stdout to it
tfile = tempfile.TemporaryFile(mode="w+b")
_redirect_stdout(tfile.fileno())
# Yield to caller, then redirect stdout back to the saved fd
yield
_redirect_stdout(saved_stdout_fd)
# Copy contents of temporary file to the given stream
tfile.flush()
tfile.seek(0, io.SEEK_SET)
stream.write(tfile.read())
finally:
tfile.close()
os.close(saved_stdout_fd)


def load_json_from_path(json_path: Path):
with gzip.open(json_path, "r") as compressed_report:
try:
Expand Down
13 changes: 13 additions & 0 deletions capa/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import idc
import idaapi
import ida_ida
import ida_nalt
import idautils
import ida_bytes
import ida_loader
Expand Down Expand Up @@ -64,6 +65,12 @@ def is_64bit() -> bool:
info: idaapi.idainfo = idaapi.get_inf_structure()
return info.is_64bit()

def retrieve_input_file_md5() -> str:
return ida_nalt.retrieve_input_file_md5()

def retrieve_input_file_sha256() -> str:
return ida_nalt.retrieve_input_file_sha256()

else:

def get_filetype() -> "ida_ida.filetype_t":
Expand All @@ -78,6 +85,12 @@ def is_32bit() -> bool:
def is_64bit() -> bool:
return idaapi.inf_is_64bit()

def retrieve_input_file_md5() -> str:
return ida_nalt.retrieve_input_file_md5().hex()

def retrieve_input_file_sha256() -> str:
return ida_nalt.retrieve_input_file_sha256().hex()


def inform_user_ida_ui(message):
# this isn't a logger, this is IDA's logging facility
Expand Down
32 changes: 32 additions & 0 deletions capa/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
import os
import sys
import logging
Expand Down Expand Up @@ -69,6 +70,7 @@
BACKEND_VMRAY = "vmray"
BACKEND_FREEZE = "freeze"
BACKEND_BINEXPORT2 = "binexport2"
BACKEND_IDA = "ida"


class CorruptFile(ValueError):
Expand Down Expand Up @@ -321,6 +323,36 @@ def get_extractor(

return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)

elif backend == BACKEND_IDA:
import capa.features.extractors.ida.idalib as idalib

if not idalib.has_idalib():
raise RuntimeError(
"cannot find IDA idalib module."
)

if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")

import ida
import ida_auto

import capa.features.extractors.ida.extractor

logger.debug("idalib: opening database...")
# idalib writes to stdout (ugh), so we have to capture that
# so as not to screw up structured output.
with capa.helpers.stdout_redirector(io.BytesIO()):
with console.status("analyzing program...", spinner="dots"):
if ida.open_database(str(input_path), run_auto_analysis=True):
raise RuntimeError("failed to analyze input file")

logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
logger.debug("idalib: opened database.")

return capa.features.extractors.ida.extractor.IdaFeatureExtractor()

else:
raise ValueError("unexpected backend: " + backend)

Expand Down
2 changes: 2 additions & 0 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from capa.rules import RuleSet
from capa.engine import MatchResults
from capa.loader import (
BACKEND_IDA,
BACKEND_VIV,
BACKEND_CAPE,
BACKEND_BINJA,
Expand Down Expand Up @@ -283,6 +284,7 @@ def install_common_args(parser, wanted=None):
backends = [
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
(BACKEND_VIV, "vivisect"),
(BACKEND_IDA, "IDA via idalib"),
(BACKEND_PEFILE, "pefile (file features only)"),
(BACKEND_BINJA, "Binary Ninja"),
(BACKEND_DOTNET, ".NET"),
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ known_first_party = [
"ghidra",
"ida",
"ida_ida",
"ida_auto",
"ida_bytes",
"ida_entry",
"ida_funcs",
Expand Down
Loading

0 comments on commit ac94f87

Please sign in to comment.