Skip to content

Commit

Permalink
Switch vendoring from pip install ... to Pex install. (#2306)
Browse files Browse the repository at this point in the history
Pex now only uses Pip for resolving distributions (`pip download ...`)
and building wheels (`pip wheel ...`), installation of wheels in chroots
and venvs is now completely handled by Pex everywhere in the code base.
  • Loading branch information
jsirois authored Dec 17, 2023
1 parent 7a2cee3 commit 5a42abf
Show file tree
Hide file tree
Showing 31 changed files with 167 additions and 378 deletions.
252 changes: 2 additions & 250 deletions pex/pep_376.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,16 @@
import json
import os
import shutil
from contextlib import closing
from fileinput import FileInput

from pex import hashing
from pex.common import is_pyc_dir, is_pyc_file, is_python_script, safe_mkdir, safe_open
from pex.compatibility import get_stdout_bytes_buffer, urlparse
from pex.dist_metadata import Distribution, EntryPoint, MetadataFiles, MetadataType
from pex.common import is_pyc_dir, is_pyc_file, safe_mkdir, safe_open
from pex.interpreter import PythonInterpreter
from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.typing import TYPE_CHECKING, cast
from pex.venv.virtualenv import Virtualenv

if TYPE_CHECKING:
from typing import (
Callable,
Container,
Dict,
Iterable,
Iterator,
Optional,
Protocol,
Text,
Tuple,
Union,
)
from typing import Callable, Iterable, Iterator, Optional, Protocol, Text, Tuple, Union

import attr # vendor:skip

Expand Down Expand Up @@ -483,240 +467,8 @@ def read(
size = int(file_size) if file_size else None
yield InstalledFile(path=path, hash=file_hash, size=size)

@staticmethod
def _find_installation(
prefix_dir, # type: str
project_name, # type: str
version, # type: str
):
# type: (...) -> Optional[MetadataFiles]

canonical_project_name = ProjectName(project_name)
canonical_version = Version(version)

# Some distributions in the wild (namely python-certifi-win32 1.6.1,
# see: https://github.com/pantsbuild/pex/issues/1861) create their own directories named
# `site-packages` that are not in-fact located in site-packages (the "purelib" or "platlib"
# sysconfig install paths). Work around these broken packages by just looking for all
# `site-packages` subdirectories of the `prefix_dir` and checking each for the installation
# `RECORD`. There should always be just one such installation `RECORD` resulting from a
# `pip install --prefix <prefix_dir> --no-deps <wheel file>` and so this is safe.
site_packages_dirs = [
os.path.join(root, d)
for root, dirs, _ in os.walk(prefix_dir)
for d in dirs
if d == "site-packages"
]
for site_packages_dir in site_packages_dirs:
metadata_files = MetadataType.DIST_INFO.load_metadata(
site_packages_dir, project_name=canonical_project_name
)
if metadata_files and canonical_version == metadata_files.metadata.version:
return metadata_files
return None

@classmethod
def from_pip_prefix_install(
cls,
prefix_dir, # type: str
project_name, # type: str
version, # type: str
):
# type: (...) -> Record
metadata_files = cls._find_installation(prefix_dir, project_name, version)
if not metadata_files:
raise RecordNotFoundError(
"Could not find project metadata for {project_name} {version} under "
"{prefix_dir}".format(
project_name=project_name, version=version, prefix_dir=prefix_dir
)
)
record_relpath = metadata_files.metadata_file_rel_path("RECORD")
if not record_relpath:
raise RecordNotFoundError(
"Could not find the installation RECORD for {project_name} {version} under "
"{location}".format(
project_name=project_name,
version=version,
location=metadata_files.metadata.location,
)
)

rel_base_dir = os.path.relpath(metadata_files.metadata.location, prefix_dir)
return cls(
project_name=project_name,
version=version,
prefix_dir=prefix_dir,
rel_base_dir=rel_base_dir,
relative_path=record_relpath,
)

project_name = attr.ib() # type: str
version = attr.ib() # type: str
prefix_dir = attr.ib() # type: str
rel_base_dir = attr.ib() # type: Text
relative_path = attr.ib() # type: Text

def _find_dist_info_file(self, filename):
# type: (str) -> Optional[DistInfoFile]
metadata_files = MetadataType.DIST_INFO.load_metadata(
location=os.path.join(self.prefix_dir, self.rel_base_dir),
project_name=ProjectName(self.project_name),
)
if metadata_files is None:
return None

metadata_file_rel_path = metadata_files.metadata_file_rel_path(filename)
if metadata_file_rel_path is None:
return None

content = metadata_files.read(filename)
if content is None:
return None

file_path = os.path.join(metadata_files.metadata.location, metadata_file_rel_path)
return DistInfoFile(path=file_path, content=content)

def fixup_install(
self,
exclude=(), # type: Container[str]
interpreter=None, # type: Optional[PythonInterpreter]
):
# type: (...) -> InstalledWheel
"""Fixes a wheel install to be reproducible and importable.
After fixed up, this RECORD can be used to re-install the wheel in a venv with `reinstall`.
:param exclude: Any top-level items to exclude.
:param interpreter: The interpreter used to perform the wheel install.
"""
self._fixup_scripts()
self._fixup_direct_url()

# The RECORD is unused in PEX zipapp mode and only needed in venv mode. Since it can contain
# relative path entries that differ between interpreters - notably pypy for Python < 3.8 has
# a custom scheme - we just delete the file and create it on-demand for venv re-installs.
os.unlink(os.path.join(self.prefix_dir, self.rel_base_dir, self.relative_path))

# An example of the installed wheel chroot we're aiming for:
# .prefix/bin/... # scripts
# .prefix/include/site/pythonX.Y/... # headers
# .prefix/share/... # data files
# greenlet/... # importables
# greenlet-1.1.2.dist-info/... # importables
stash_dir = ".prefix"
prefix_stash = os.path.join(self.prefix_dir, stash_dir)
safe_mkdir(prefix_stash)

# 1. Move everything into the stash.
for item in os.listdir(self.prefix_dir):
if stash_dir == item or item in exclude:
continue
shutil.move(os.path.join(self.prefix_dir, item), os.path.join(prefix_stash, item))
# 2. Normalize all `*/{python ver}` paths to `*/pythonX.Y`
for root, dirs, _ in os.walk(prefix_stash):
dirs_to_scan = []
for d in dirs:
path = os.path.join(root, d)
normalized_path = InstalledFile.normalized_path(path, interpreter=interpreter)
if normalized_path != path:
shutil.move(path, normalized_path)
else:
dirs_to_scan.append(d)
dirs[:] = dirs_to_scan

# 3. Move `site-packages` content back up to the prefix dir chroot so that content is
# importable when this prefix dir chroot is added to the `sys.path` in PEX zipapp mode.
importable_stash = InstalledFile.normalized_path(
os.path.join(prefix_stash, self.rel_base_dir), interpreter=interpreter
)
for importable_item in os.listdir(importable_stash):
shutil.move(
os.path.join(importable_stash, importable_item),
os.path.join(self.prefix_dir, importable_item),
)
os.rmdir(importable_stash)

return InstalledWheel.save(
prefix_dir=self.prefix_dir,
stash_dir=stash_dir,
record_relpath=self.relative_path,
)

def _fixup_scripts(self):
# type: (...) -> None
bin_dir = os.path.join(self.prefix_dir, "bin")
if not os.path.isdir(bin_dir):
return

console_scripts = {} # type: Dict[Text, EntryPoint]
entry_points_file = self._find_dist_info_file("entry_points.txt")
if entry_points_file:
console_scripts.update(
Distribution.parse_entry_map(entry_points_file.content).get("console_scripts", {})
)

scripts = {} # type: Dict[str, Optional[bytes]]
for script_name in os.listdir(bin_dir):
script_path = os.path.join(bin_dir, script_name)
if is_python_script(script_path):
scripts[script_path] = None
elif script_name in console_scripts:
# When a wheel is installed by Pip and that wheel contains console_scripts, they are
# normally written with a faux-shebang of:
# #!python
#
# Pex relies on this hermetic shebang and only ever reifies it when creating venvs.
#
# If Pip is being run under a Python executable with a path length >127 characters
# on Linux though, it writes a shebang / header of:
# #!/bin/sh
# '''exec' <too long path to Pip venv python> "$0" "$@"'
# ' '''
#
# That header is immediately followed by the expected console_script shim contents:
# # -*- coding: utf-8 -*-
# import re
# import sys
# from <ep_module> import <ep_func>
# if __name__ == '__main__':
# sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
# sys.exit(main())
#
# Instead of guessing that 127 characters is the shebang length limit and using
# Pip's safety-hatch `/bin/sh` trick, we forcibly re-write the header to be just the
# expected `#!python` shebang. We detect the end of the header with the known 1st
# line of console_script shim ~code defined in
# pex/vendor/_vendored/pip/pip/_vendor/distlib/scripts.py on line 41:
# https://github.com/pantsbuild/pex/blob/196b4cd5b8dd4b4af2586460530e9a777262be7d/pex/vendor/_vendored/pip/pip/_vendor/distlib/scripts.py#L41
scripts[script_path] = b"# -*- coding: utf-8 -*-"
if not scripts:
return

with closing(FileInput(files=scripts.keys(), inplace=True, mode="rb")) as script_fi:
first_non_shebang_line = None # type: Optional[bytes]
for line in script_fi:
buffer = get_stdout_bytes_buffer()
if script_fi.isfirstline():
first_non_shebang_line = scripts[script_fi.filename()]
# Ensure python shebangs are reproducible. The only place these can be used is
# in venv mode PEXes where the `#!python` placeholder shebang will be re-written
# to use the venv's python interpreter.
buffer.write(b"#!python\n")
elif (
not first_non_shebang_line
or cast(bytes, line).strip() == first_non_shebang_line
):
# N.B.: These lines include the newline already.
buffer.write(cast(bytes, line))
first_non_shebang_line = None

def _fixup_direct_url(self):
# type: () -> None
direct_url_file = self._find_dist_info_file("direct_url.json")
if direct_url_file:
if (
urlparse.urlparse(json.loads(direct_url_file.content.decode("utf-8"))["url"]).scheme
== "file"
):
os.unlink(direct_url_file.path)
19 changes: 11 additions & 8 deletions pex/pep_427.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,13 +394,16 @@ def record_files(
print("pex", file=fp)
installed_files.append(InstalledWheel.create_installed_file(path=fp.name, dest_dir=dest))

if requested:
requested_path = os.path.join(dest, wheel.metadata_path("REQUESTED"))
touch(requested_path)
installed_files.append(
InstalledWheel.create_installed_file(path=requested_path, dest_dir=dest)
)
if interpreter:
# Finalize a proper venv install with REQUESTED and a RECORD to support un-installing.
if requested:
requested_path = os.path.join(dest, wheel.metadata_path("REQUESTED"))
touch(requested_path)
installed_files.append(
InstalledWheel.create_installed_file(path=requested_path, dest_dir=dest)
)

installed_files.append(InstalledFile(path=record_relpath, hash=None, size=None))
Record.write(dst=record_abspath, installed_files=installed_files)

installed_files.append(InstalledFile(path=record_relpath, hash=None, size=None))
Record.write(dst=record_abspath, installed_files=installed_files)
return wheel.metadata_files
2 changes: 1 addition & 1 deletion pex/vendor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def git(
rewrite=True, # type: bool
constraints=(), # type: Tuple[str, ...]
):
requirement = "git+{repo}@{commit}#egg={project_name}".format(
requirement = "{project_name} @ git+{repo}@{commit}".format(
repo=repo, commit=commit, project_name=project_name
)
if not prep_command:
Expand Down
Loading

0 comments on commit 5a42abf

Please sign in to comment.