binary ninja: search for API using XDG desktop entry

ref #2376
mandiant · Sep 20, 2024 · e70225b · e70225b
1 parent 02b5930
commit e70225b
Show file tree

Hide file tree

Showing 5 changed files with 166 additions and 151 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### New Features
 
 - add IDA v9.0 backend via idalib #2376 @williballenthin
+- locate Binary Ninja API using XDG Desktop Entries #2376 @williballenthin
 
 ### Breaking Changes
 

diff --git a/capa/features/extractors/binja/find_binja_api.py b/capa/features/extractors/binja/find_binja_api.py
@@ -5,16 +5,24 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import os
+import sys
+import logging
 import subprocess
+import importlib.util
+from typing import Optional
 from pathlib import Path
 
+logger = logging.getLogger(__name__)
+
+
 # When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
 # we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
 # to find out the path of the binaryninja module that has been installed.
 # Note, including the binaryninja module in the `pyinstaller.spec` would not work, since the binaryninja module tries to
 # find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the
 # binaryninja module is extracted by the PyInstaller.
-code = r"""
+CODE = r"""
 from pathlib import Path
 from importlib import util
 spec = util.find_spec('binaryninja')
@@ -26,10 +34,141 @@
 """
 
 
-def find_binja_path() -> Path:
-    raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
-    return Path(bytes.fromhex(raw_output).decode("utf8"))
+def find_binaryninja_path_via_subprocess() -> Optional[Path]:
+    raw_output = subprocess.check_output(["python", "-c", CODE]).decode("ascii").strip()
+    output = bytes.fromhex(raw_output).decode("utf8")
+    if not output.strip():
+        return None
+    return Path(output)
+
+
+def get_desktop_entry(name: str) -> Optional[Path]:
+    """
+    Find the path for the given XDG Desktop Entry name.
+
+    Like:
+
+        >> get_desktop_entry("com.vector35.binaryninja.desktop")
+        Path("~/.local/share/applications/com.vector35.binaryninja.desktop")
+    """
+    assert sys.platform in ("linux", "linux2")
+    assert name.endswith(".desktop")
+
+    default_data_dirs = f"/usr/share/applications:{Path.home()}/.local/share"
+    data_dirs = os.environ.get("XDG_DATA_DIRS", default_data_dirs)
+    for data_dir in data_dirs.split(":"):
+        applications = Path(data_dir) / "applications"
+        for application in applications.glob("*.desktop"):
+            if application.name == name:
+                return application
+
+    return None
+
+
+def get_binaryninja_path(desktop_entry: Path) -> Optional[Path]:
+    # from: Exec=/home/wballenthin/software/binaryninja/binaryninja %u
+    # to:        /home/wballenthin/software/binaryninja/
+    for line in desktop_entry.read_text(encoding="utf-8").splitlines():
+        if not line.startswith("Exec="):
+            continue
+
+        if not line.endswith("binaryninja %u"):
+            continue
+
+        binaryninja_path = Path(line[len("Exec=") : -len("binaryninja %u")])
+        if not binaryninja_path.exists():
+            return None
+
+        return binaryninja_path
+
+    return None
+
+
+def validate_binaryninja_path(binaryninja_path: Path) -> bool:
+    if not binaryninja_path:
+        return False
+
+    module_path = binaryninja_path / "python"
+    if not module_path.is_dir():
+        return False
+
+    if not (module_path / "binaryninja" / "__init__.py").is_file():
+        return False
+
+    return True
+
+
+def find_binaryninja() -> Optional[Path]:
+    binaryninja_path = find_binaryninja_path_via_subprocess()
+    if not binaryninja_path or not validate_binaryninja_path(binaryninja_path):
+        if sys.platform == "linux" or sys.platform == "linux2":
+            # ok
+            logger.debug("detected OS: linux")
+        elif sys.platform == "darwin":
+            raise NotImplementedError(f"unsupported platform: {sys.platform}")
+        elif sys.platform == "win32":
+            raise NotImplementedError(f"unsupported platform: {sys.platform}")
+        else:
+            raise NotImplementedError(f"unsupported platform: {sys.platform}")
+
+        desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
+        if not desktop_entry:
+            return None
+        logger.debug("found Binary Ninja application: %s", desktop_entry)
+
+        binaryninja_path = get_binaryninja_path(desktop_entry)
+        if not binaryninja_path:
+            return None
+
+        if not validate_binaryninja_path(binaryninja_path):
+            return None
+
+    logger.debug("found Binary Ninja installation: %s", binaryninja_path)
+
+    return binaryninja_path / "python"
+
+
+def is_binaryninja_installed() -> bool:
+    """Is the binaryninja module ready to import?"""
+    try:
+        return importlib.util.find_spec("binaryninja") is not None
+    except ModuleNotFoundError:
+        return False
+
+
+def has_binaryninja() -> bool:
+    if is_binaryninja_installed():
+        logger.debug("found installed Binary Ninja API")
+        return True
+
+    logger.debug("Binary Ninja API not installed, searching...")
+
+    binaryninja_path = find_binaryninja()
+    if not binaryninja_path:
+        logger.debug("failed to find Binary Ninja installation")
+
+    logger.debug("found Binary Ninja API: %s", binaryninja_path)
+    return binaryninja_path is not None
+
+
+def load_binaryninja() -> bool:
+    try:
+        import binaryninja
+
+        return True
+    except ImportError:
+        binaryninja_path = find_binaryninja()
+        if not binaryninja_path:
+            return False
+
+        sys.path.append(binaryninja_path.absolute().as_posix())
+        try:
+            import binaryninja  # noqa: F401 unused import
+
+            return True
+        except ImportError:
+            return False
 
 
 if __name__ == "__main__":
-    print(find_binja_path())
+    print(find_binaryninja_path_via_subprocess())
diff --git a/capa/helpers.py b/capa/helpers.py
@@ -5,14 +5,14 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-import os
 import io
+import os
 import sys
 import gzip
 import ctypes
 import inspect
-import tempfile
 import logging
+import tempfile
 import contextlib
 import importlib.util
 from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn
@@ -87,6 +87,7 @@ def assert_never(value) -> NoReturn:
 # Redirect stdout at the C runtime level,
 # which lets us handle native libraries that spam stdout.
 # via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
+# TODO: this only works on POSIX
 LIBC = ctypes.CDLL(None)
 C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
 
@@ -105,13 +106,13 @@ def _redirect_stdout(to_fd):
         # Make original_stdout_fd point to the same file as to_fd
         os.dup2(to_fd, original_stdout_fd)
         # Create a new sys.stdout that points to the redirected fd
-        sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
+        sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, "wb"))
 
     # Save a copy of the original stdout fd in saved_stdout_fd
     saved_stdout_fd = os.dup(original_stdout_fd)
     try:
         # Create a temporary file and redirect stdout to it
-        tfile = tempfile.TemporaryFile(mode='w+b')
+        tfile = tempfile.TemporaryFile(mode="w+b")
         _redirect_stdout(tfile.fileno())
         # Yield to caller, then redirect stdout back to the saved fd
         yield

diff --git a/capa/loader.py b/capa/loader.py
@@ -7,7 +7,6 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import io
 import os
-import sys
 import logging
 import datetime
 import contextlib
@@ -239,25 +238,18 @@ def get_extractor(
         return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(input_path)
 
     elif backend == BACKEND_BINJA:
-        import capa.helpers
-        from capa.features.extractors.binja.find_binja_api import find_binja_path
-
-        # When we are running as a standalone executable, we cannot directly import binaryninja
-        # We need to fist find the binja API installation path and add it into sys.path
-        if capa.helpers.is_running_standalone():
-            bn_api = find_binja_path()
-            if bn_api.exists():
-                sys.path.append(str(bn_api))
-
-        try:
-            import binaryninja
-            from binaryninja import BinaryView
-        except ImportError:
+        import capa.features.extractors.binja.find_binja_api as finder
+
+        if not finder.has_binaryninja():
             raise RuntimeError(
-                "Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
-                + "https://docs.binary.ninja/dev/batch.html#install-the-api)."
+                "cannot find Binary Ninja API module."
             )
 
+        if not finder.load_binaryninja():
+            raise RuntimeError("failed to load Binary Ninja API module.")
+
+        import binaryninja
+
         import capa.features.extractors.binja.extractor
 
         if input_format not in (FORMAT_SC32, FORMAT_SC64):
@@ -271,7 +263,7 @@ def get_extractor(
                 raise UnsupportedOSError()
 
         with console.status("analyzing program...", spinner="dots"):
-            bv: BinaryView = binaryninja.load(str(input_path))
+            bv: binaryninja.BinaryView = binaryninja.load(str(input_path))
             if bv is None:
                 raise RuntimeError(f"Binary Ninja cannot open file {input_path}")
 
@@ -328,12 +320,11 @@ def get_extractor(
 
         if not idalib.has_idalib():
             raise RuntimeError(
-                # TODO(williballenthin): add more details here
-                "cannot find IDA idalib  module."
+                "cannot find IDA idalib module."
             )
 
         if not idalib.load_idalib():
-            raise RuntimeError("failed to load IDA idalib  module.")
+            raise RuntimeError("failed to load IDA idalib module.")
 
         import ida
         import ida_auto
@@ -344,8 +335,9 @@ def get_extractor(
         # idalib writes to stdout (ugh), so we have to capture that
         # so as not to screw up structured output.
         with capa.helpers.stdout_redirector(io.BytesIO()):
-            if ida.open_database(str(input_path), run_auto_analysis=True):
-                raise RuntimeError("failed to analyze input file")
+            with console.status("analyzing program...", spinner="dots"):
+                if ida.open_database(str(input_path), run_auto_analysis=True):
+                    raise RuntimeError("failed to analyze input file")
 
             logger.debug("idalib: waiting for analysis...")
             ida_auto.auto_wait()