From d0ea4fbf2408060a2e4e968adfcee634a7c354cd Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 23 Aug 2024 09:27:37 -0700 Subject: [PATCH] Add ability to repair wheels for other architectures --- src/auditwheel/elfutils.py | 36 +++++++ src/auditwheel/libc.py | 8 +- src/auditwheel/main_repair.py | 28 ++++-- src/auditwheel/main_show.py | 2 + src/auditwheel/musllinux.py | 41 ++++---- src/auditwheel/policy/__init__.py | 157 ++++++++++++++++++++++-------- tests/unit/test_musllinux.py | 22 ++--- tests/unit/test_policy.py | 16 +-- 8 files changed, 208 insertions(+), 102 deletions(-) diff --git a/src/auditwheel/elfutils.py b/src/auditwheel/elfutils.py index b09470f2..e0f4a0e4 100644 --- a/src/auditwheel/elfutils.py +++ b/src/auditwheel/elfutils.py @@ -5,9 +5,11 @@ from typing import Iterator from elftools.common.exceptions import ELFError +from elftools.elf.dynamic import DynamicSegment from elftools.elf.elffile import ELFFile from .lddtree import parse_ld_paths +from .libc import Libc def elf_read_dt_needed(fn: str) -> list[str]: @@ -161,3 +163,37 @@ def filter_undefined_symbols( if intersection: result[lib] = sorted(intersection) return result + + +def elf_get_platform_info(path: str) -> tuple[Libc | None, str | None]: + with open(path, "rb") as f: + try: + elf = ELFFile(f) + except ELFError: + return (None, None) + arch = { + "x64": "x86_64", + "x86": "i686", + "AArch64": "aarch64", + "64-bit PowerPC": "ppc64", + "IBM S/390": "s390x", + "ARM": "armv7l", + "RISC-V": "riscv64", + }[elf.get_machine_arch()] + if arch == "ppc64" and elf.header.e_ident.EI_DATA == "ELFDATA2LSB": + arch = "ppc64le" + + libc = None + for seg in elf.iter_segments(): + if not isinstance(seg, DynamicSegment): + continue + for tag in seg.iter_tags(): + if tag.entry.d_tag == "DT_NEEDED": + if tag.needed == "libc.so.6": + libc = Libc.GLIBC + break + if tag.needed.startswith("libc.musl-"): + libc = Libc.MUSL + break + break + return (libc, arch) diff --git a/src/auditwheel/libc.py b/src/auditwheel/libc.py index 89a6841e..7201336f 100644 --- a/src/auditwheel/libc.py +++ b/src/auditwheel/libc.py @@ -3,7 +3,6 @@ import logging from enum import IntEnum -from .error import InvalidLibc from .musllinux import find_musl_libc logger = logging.getLogger(__name__) @@ -15,10 +14,7 @@ class Libc(IntEnum): def get_libc() -> Libc: - try: - find_musl_libc() + if find_musl_libc() is not None: logger.debug("Detected musl libc") return Libc.MUSL - except InvalidLibc: - logger.debug("Falling back to GNU libc") - return Libc.GLIBC + return Libc.GLIBC diff --git a/src/auditwheel/main_repair.py b/src/auditwheel/main_repair.py index 4374f208..df218f2c 100644 --- a/src/auditwheel/main_repair.py +++ b/src/auditwheel/main_repair.py @@ -43,13 +43,19 @@ def configure_parser(sub_parsers): p.add_argument( "--plat", action=EnvironmentDefault, + required=False, metavar="PLATFORM", env="AUDITWHEEL_PLAT", dest="PLAT", help="Desired target platform. See the available platforms under the " - f'PLATFORMS section below. (default: "{highest_policy}")', + f'PLATFORMS section below. (default on current arch: "{highest_policy}")', choices=policy_names, - default=highest_policy, + ) + p.add_argument( + "--best-plat", + action="store_true", + dest="BEST_PLAT", + help="Automatically determine the best target platform.", ) p.add_argument( "-L", @@ -115,18 +121,27 @@ def execute(args, p): for wheel_file in args.WHEEL_FILE: if not isfile(wheel_file): p.error("cannot access %s. No such file" % wheel_file) + wheel_policy.set_platform_from_wheel(wheel_file) logger.info("Repairing %s", basename(wheel_file)) - if not exists(args.WHEEL_DIR): - os.makedirs(args.WHEEL_DIR) - try: wheel_abi = analyze_wheel_abi(wheel_policy, wheel_file, exclude) except NonPlatformWheel: logger.info(NonPlatformWheel.LOG_MESSAGE) return 1 + if args.BEST_PLAT: + if args.PLAT: + p.error("Cannot specify both --best-plat and --plat") + args.PLAT = wheel_abi.overall_tag + + if not exists(args.WHEEL_DIR): + os.makedirs(args.WHEEL_DIR) + + highest_policy = wheel_policy.get_policy_name(wheel_policy.priority_highest) + if args.PLAT is None: + args.PLAT = highest_policy policy = wheel_policy.get_policy_by_name(args.PLAT) reqd_tag = policy["priority"] @@ -134,7 +149,8 @@ def execute(args, p): msg = ( 'cannot repair "%s" to "%s" ABI because of the presence ' "of too-recent versioned symbols. You'll need to compile " - "the wheel on an older toolchain." % (wheel_file, args.PLAT) + "the wheel on an older toolchain or pick a newer platform." + % (wheel_file, args.PLAT) ) p.error(msg) diff --git a/src/auditwheel/main_show.py b/src/auditwheel/main_show.py index d2157ea6..33e6614b 100644 --- a/src/auditwheel/main_show.py +++ b/src/auditwheel/main_show.py @@ -34,6 +34,8 @@ def execute(args, p): if not isfile(args.WHEEL_FILE): p.error("cannot access %s. No such file" % args.WHEEL_FILE) + wheel_policy.set_platform_from_wheel(args.WHEEL_FILE) + try: winfo = analyze_wheel_abi(wheel_policy, args.WHEEL_FILE, frozenset()) except NonPlatformWheel: diff --git a/src/auditwheel/musllinux.py b/src/auditwheel/musllinux.py index d3e8ed38..79bc7b05 100644 --- a/src/auditwheel/musllinux.py +++ b/src/auditwheel/musllinux.py @@ -3,12 +3,10 @@ import logging import pathlib import re -import subprocess from typing import NamedTuple -from auditwheel.error import InvalidLibc - LOG = logging.getLogger(__name__) +VERSION_RE = re.compile(b"[^.](?P\\d+)\\.(?P\\d+)\\.(?P\\d+)\0") class MuslVersion(NamedTuple): @@ -17,31 +15,28 @@ class MuslVersion(NamedTuple): patch: int -def find_musl_libc() -> pathlib.Path: +def find_musl_libc(library_path: str | None = None) -> pathlib.Path | None: try: - (dl_path,) = list(pathlib.Path("/lib").glob("libc.musl-*.so.1")) + (dl_path,) = list(pathlib.Path(library_path or "/lib").glob("libc.musl-*.so.1")) except ValueError: - LOG.debug("musl libc not detected") - raise InvalidLibc + return None return dl_path -def get_musl_version(ld_path: pathlib.Path) -> MuslVersion: +def get_musl_version(ld_path: pathlib.Path) -> MuslVersion | None: try: - ld = subprocess.run( - [ld_path], check=False, errors="strict", stderr=subprocess.PIPE - ).stderr + with open(ld_path, "rb") as fp: + text = fp.read() except FileNotFoundError: - LOG.error("Failed to determine musl version", exc_info=True) - raise InvalidLibc - - match = re.search( - r"Version " r"(?P\d+)." r"(?P\d+)." r"(?P\d+)", ld - ) - if not match: - raise InvalidLibc - - return MuslVersion( - int(match.group("major")), int(match.group("minor")), int(match.group("patch")) - ) + return None + + for match in VERSION_RE.finditer(text): + return MuslVersion( + int(match.group("major")), + int(match.group("minor")), + int(match.group("patch")), + ) + + LOG.error("Failed to determine musl version", exc_info=True) + return None diff --git a/src/auditwheel/policy/__init__.py b/src/auditwheel/policy/__init__.py index 7c9b1585..c5190453 100644 --- a/src/auditwheel/policy/__init__.py +++ b/src/auditwheel/policy/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy import json import logging import platform as _platform_module @@ -11,7 +12,12 @@ from pathlib import Path from typing import Any, Generator -from auditwheel.elfutils import filter_undefined_symbols, is_subdir +from auditwheel.elfutils import ( + elf_get_platform_info, + filter_undefined_symbols, + is_subdir, +) +from auditwheel.wheeltools import InWheelCtx from ..libc import Libc, get_libc from ..musllinux import find_musl_libc, get_musl_version @@ -27,6 +33,17 @@ Libc.MUSL: _HERE / "musllinux-policy.json", } +ALL_ARCHES = [ + "x86_64", + "i686", + "aarch64", + "ppc64", + "ppc64le", + "s390x", + "armv7l", + "riscv64", +] + class WheelPolicies: def __init__( @@ -35,52 +52,88 @@ def __init__( libc: Libc | None = None, musl_policy: str | None = None, arch: str | None = None, + library_path: str | None = None, ) -> None: - if libc is None: - libc = get_libc() if musl_policy is None else Libc.MUSL - if libc != Libc.MUSL and musl_policy is not None: - raise ValueError(f"'musl_policy' shall be None for libc {libc.name}") - if libc == Libc.MUSL: - if musl_policy is None: - musl_version = get_musl_version(find_musl_libc()) - musl_policy = f"musllinux_{musl_version.major}_{musl_version.minor}" - elif _MUSL_POLICY_RE.match(musl_policy) is None: - raise ValueError(f"Invalid 'musl_policy': '{musl_policy}'") - if arch is None: - arch = get_arch_name() - policies = json.loads(_POLICY_JSON_MAP[libc].read_text()) - self._policies = [] + self._policies: list[dict[str, Any]] = [] self._arch_name = arch self._libc_variant = libc self._musl_policy = musl_policy + self._library_path = library_path + self._reload_policies() + + def _reload_policies(self): + self._policies.clear() + + if self._libc_variant is None: + self._libc_variant = get_libc() if self._musl_policy is None else Libc.MUSL + + if ( + self._libc_variant is not None + and self._libc_variant != Libc.MUSL + and self._musl_policy is not None + ): + raise ValueError( + f"'musl_policy' shall be None for libc {self._libc_variant.name}" + ) + + if self._libc_variant == Libc.MUSL: + if self._musl_policy is None: + libc_path = find_musl_libc(self._library_path) + if libc_path is not None: + musl_version = get_musl_version(libc_path) + self._musl_policy = ( + f"musllinux_{musl_version.major}_{musl_version.minor}" + ) + elif _MUSL_POLICY_RE.match(self._musl_policy) is None: + raise ValueError(f"Invalid 'musl_policy': '{self._musl_policy}'") - _validate_pep600_compliance(policies) - for policy in policies: - if self._musl_policy is not None and policy["name"] not in { - "linux", - self._musl_policy, - }: + for libc, policy_path in _POLICY_JSON_MAP.items(): + if self._libc_variant is not None and self._libc_variant != libc: continue - if ( - self._arch_name in policy["symbol_versions"].keys() - or policy["name"] == "linux" - ): - if policy["name"] != "linux": - policy["symbol_versions"] = policy["symbol_versions"][ - self._arch_name - ] - policy["name"] = policy["name"] + "_" + self._arch_name - policy["aliases"] = [ - alias + "_" + self._arch_name for alias in policy["aliases"] - ] - policy["lib_whitelist"] = _fixup_musl_libc_soname( - libc, arch, policy["lib_whitelist"] + policies = json.loads(policy_path.read_text()) + _validate_pep600_compliance(policies) + for policy in policies: + if self._musl_policy is not None and policy["name"] not in { + "linux", + self._musl_policy, + }: + continue + versioning = ( + policy["symbol_versions"] + if policy["name"] != "linux" + else {arch: [] for arch in ALL_ARCHES} ) - self._policies.append(policy) + for arch, versions in versioning.items(): + if self._arch_name is not None and self._arch_name != arch: + continue + archpolicy = copy.copy(policy) + + archpolicy["arch"] = arch + if archpolicy["name"] != "linux": + archpolicy["symbol_versions"] = versions + archpolicy["name"] = archpolicy["name"] + "_" + arch + archpolicy["aliases"] = [ + alias + "_" + arch for alias in archpolicy["aliases"] + ] + archpolicy["lib_whitelist"] = _fixup_musl_libc_soname( + libc, arch, archpolicy["lib_whitelist"] + ) + self._policies.append(archpolicy) - if self._libc_variant == Libc.MUSL: + if self._libc_variant == Libc.MUSL and self._arch_name is not None: assert len(self._policies) == 2, self._policies + def set_platform_from_wheel(self, wheel_path: str): + with InWheelCtx(wheel_path) as ctx: + for file_path in ctx.iter_files(): + libc, arch = elf_get_platform_info(file_path) + if arch is not None: + if libc is not None: + self._libc_variant = libc + self._arch_name = arch + self._reload_policies() + break + @property def policies(self): return self._policies @@ -103,13 +156,35 @@ def get_policy_by_name(self, name: str) -> dict | None: raise RuntimeError("Internal error. Policies should be unique") return matches[0] - def get_policy_name(self, priority: int) -> str | None: + def get_policy_name( + self, priority: int, default_arch: str | None = None + ) -> str | None: matches = [p["name"] for p in self._policies if p["priority"] == priority] if len(matches) == 0: return None - if len(matches) > 1: - raise RuntimeError("Internal error. priorities should be unique") - return matches[0] + if len(matches) == 1: + return matches[0] + if default_arch is not None: + matches2 = [p for p in matches if p.endswith(default_arch)] + if matches2: + if len(matches2) > 1: + raise RuntimeError("Internal error. Priorities should be unique.") + return matches2[0] + host_arch = get_arch_name() + matches2 = [p for p in matches if p.endswith(host_arch)] + if matches2: + if len(matches2) > 1: + raise RuntimeError("Internal error. Priorities should be unique.") + return matches2[0] + for ordered_arch in ALL_ARCHES: + matches2 = [p for p in matches if p.endswith(ordered_arch)] + if matches2: + if len(matches2) > 1: + raise RuntimeError("Internal error. Priorities should be unique.") + return matches2[0] + raise RuntimeError( + "Internal error. Every policy should have a known architecture." + ) def get_priority_by_name(self, name: str) -> int | None: policy = self.get_policy_by_name(name) diff --git a/tests/unit/test_musllinux.py b/tests/unit/test_musllinux.py index 9f33591b..f04ac4fc 100644 --- a/tests/unit/test_musllinux.py +++ b/tests/unit/test_musllinux.py @@ -1,19 +1,15 @@ from __future__ import annotations -import subprocess +from io import BytesIO from unittest.mock import patch -import pytest - -from auditwheel.error import InvalidLibc from auditwheel.musllinux import find_musl_libc, get_musl_version @patch("auditwheel.musllinux.pathlib.Path") def test_find_musllinux_not_found(path_mock): path_mock.return_value.glob.return_value = [] - with pytest.raises(InvalidLibc): - find_musl_libc() + assert find_musl_libc() is None @patch("auditwheel.musllinux.pathlib.Path") @@ -24,20 +20,18 @@ def test_find_musllinux_found(path_mock): def test_get_musl_version_invalid_path(): - with pytest.raises(InvalidLibc): - get_musl_version("/tmp/no/executable/here") + assert get_musl_version("/tmp/no/executable/here") is None -@patch("auditwheel.musllinux.subprocess.run") +@patch("auditwheel.musllinux.open") def test_get_musl_version_invalid_version(run_mock): - run_mock.return_value = subprocess.CompletedProcess([], 1, None, "Version 1.1") - with pytest.raises(InvalidLibc): - get_musl_version("anything") + run_mock.return_value = BytesIO(b"jklasdfjkl Version 1.1") + assert get_musl_version("anything") is None -@patch("auditwheel.musllinux.subprocess.run") +@patch("auditwheel.musllinux.open") def test_get_musl_version_valid_version(run_mock): - run_mock.return_value = subprocess.CompletedProcess([], 1, None, "Version 5.6.7") + run_mock.return_value = BytesIO(b"jklasdfjkl Version 5.6.7\0 sjlkdfjkl") version = get_musl_version("anything") assert version.major == 5 assert version.minor == 6 diff --git a/tests/unit/test_policy.py b/tests/unit/test_policy.py index d05418f3..3f046dee 100644 --- a/tests/unit/test_policy.py +++ b/tests/unit/test_policy.py @@ -8,13 +8,11 @@ import pytest -from auditwheel.error import InvalidLibc from auditwheel.libc import Libc from auditwheel.policy import ( WheelPolicies, _validate_pep600_compliance, get_arch_name, - get_libc, get_replace_platforms, ) @@ -211,8 +209,8 @@ def test_get_by_priority_missing(self): def test_get_by_priority_duplicate(self): wheel_policy = WheelPolicies() wheel_policy._policies = [ - {"name": "duplicate", "priority": 0}, - {"name": "duplicate", "priority": 0}, + {"name": "duplicate_i686", "priority": 0}, + {"name": "duplicate_i686", "priority": 0}, ] with pytest.raises(RuntimeError): wheel_policy.get_policy_name(0) @@ -283,6 +281,7 @@ def test_filter_libs(self): (None, None, None, does_not_raise()), (Libc.GLIBC, None, None, does_not_raise()), (Libc.MUSL, "musllinux_1_1", None, does_not_raise()), + (Libc.MUSL, None, None, does_not_raise()), (None, "musllinux_1_1", None, does_not_raise()), (None, None, "aarch64", does_not_raise()), # invalid @@ -293,15 +292,8 @@ def test_filter_libs(self): raises(ValueError, "'musl_policy' shall be None"), ), (Libc.MUSL, "manylinux_1_1", None, raises(ValueError, "Invalid 'musl_policy'")), - (Libc.MUSL, "musllinux_5_1", None, raises(AssertionError)), + (Libc.MUSL, "musllinux_5_1", "x86_64", raises(AssertionError)), (Libc.MUSL, "musllinux_1_1", "foo", raises(AssertionError)), - # platform dependant - ( - Libc.MUSL, - None, - None, - does_not_raise() if get_libc() == Libc.MUSL else raises(InvalidLibc), - ), ], ids=ids, )