Skip to content

Commit

Permalink
Fix pex force local to handle PEP-420.
Browse files Browse the repository at this point in the history
Previously, pex would blow up trying to adjust import paths if a PEP-420
implicit namespace package was encountered. Add a test of the path
adjustment functionality both to confirm it was needed (it was) and that
the fix technique works with all forms of namespace packages by only
assuming they support `append`.

Fixes pex-tool#598
  • Loading branch information
jsirois committed Oct 29, 2018
1 parent 681a668 commit beb77bd
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 32 deletions.
53 changes: 53 additions & 0 deletions pex/bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# coding=utf-8
# Copyright 2018 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import os
import pkgutil


def module_path(module_name):
"""Converts a python module name into a list of the components of its package hierarchy."""
return module_name.split('.')


class Bootstrap(object):
"""Supports introspection of the PEX bootstrap code."""

_INSTANCE = None

@classmethod
def locate(cls):
"""Locates the active PEX bootstrap."""
if cls._INSTANCE is None:
bootstrap_path = __file__
module_import_path = module_path(__name__)
pex_package = module_import_path[0]

# For example, our __file__ might be requests.pex/.bootstrap/_pex/bootstrap.pyc and our import
# path _pex.bootstrap; so we walk back through all the module components of our import path to
# find the base sys.path entry where we were found (requests.pex/.bootstrap in this example).
for _ in module_import_path:
bootstrap_path = os.path.dirname(bootstrap_path)

cls._INSTANCE = cls(pex_module_name=pex_package, path_entry=bootstrap_path)
return cls._INSTANCE

def __init__(self, pex_module_name, path_entry):
self._pex_module_name = pex_module_name
self._path_entry = path_entry

@property
def pex_module_name(self):
"""Returns the name of the root `pex` module in the context of the bootstrap."""
return self._pex_module_name

@property
def path_entry(self):
"""Returns the sys.path entry that contains the PEX bootstrap code."""
return self._path_entry

def iter_root_module_names(self):
"""Returns an iterator over the root modules found in the PEX bootstrap."""
for _, module_name, _ in pkgutil.iter_modules([self.path_entry]):
yield module_name
48 changes: 34 additions & 14 deletions pex/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

from __future__ import absolute_import, print_function

import importlib
import itertools
import os
import site
import sys
import uuid

from .bootstrap import Bootstrap, module_path
from .common import die, open_zip, rename_if_empty, safe_mkdir, safe_rmtree
from .interpreter import PythonInterpreter
from .package import distribution_compatible
Expand All @@ -22,18 +24,18 @@

class PEXEnvironment(object):
@classmethod
def force_local(cls, pex, pex_info):
def force_local(cls, pex_file, pex_info):
if pex_info.code_hash is None:
# Do not support force_local if code_hash is not set. (It should always be set.)
return pex
return pex_file
explode_dir = os.path.join(pex_info.zip_unsafe_cache, pex_info.code_hash)
TRACER.log('PEX is not zip safe, exploding to %s' % explode_dir)
if not os.path.exists(explode_dir):
explode_tmp = explode_dir + '.' + uuid.uuid4().hex
with TRACER.timed('Unzipping %s' % pex):
with TRACER.timed('Unzipping %s' % pex_file):
try:
safe_mkdir(explode_tmp)
with open_zip(pex) as pex_zip:
with open_zip(pex_file) as pex_zip:
pex_files = (x for x in pex_zip.namelist()
if not x.startswith(PEXBuilder.BOOTSTRAP_DIR) and
not x.startswith(PexInfo.INTERNAL_CACHE))
Expand All @@ -46,15 +48,32 @@ def force_local(cls, pex, pex_info):
return explode_dir

@classmethod
def update_module_paths(cls, new_code_path):
# Force subsequent imports to come from the .pex directory rather than the .pex file.
TRACER.log('Adding to the head of sys.path: %s' % new_code_path)
sys.path.insert(0, new_code_path)
for name, module in sys.modules.items():
if hasattr(module, "__path__"):
module_dir = os.path.join(new_code_path, *name.split("."))
TRACER.log('Adding to the head of %s.__path__: %s' % (module.__name__, module_dir))
module.__path__.insert(0, module_dir)
def update_module_paths(cls, pex_file, explode_dir):
# Force subsequent imports to come from the exploded .pex directory rather than the .pex file.
TRACER.log('Adding to the head of sys.path: %s' % explode_dir)
sys.path.insert(0, explode_dir)

bootstrap = Bootstrap.locate()
bootstrap_module_names = frozenset(bootstrap.iter_root_module_names())

def reimport(module_name):
if module_path(module_name)[0] in bootstrap_module_names:
TRACER.log('Not reimporting bootstrap module %s' % module_name, V=2)
return

existing_module = sys.modules.pop(module_name)
TRACER.log('Reimporting %s loaded via %r from exploded pex'
% (module_name, existing_module.__path__))
reimported_module = importlib.import_module(module_name)
for item in existing_module.__path__:
reimported_module.__path__.append(item)

# And re-import any packages already loaded from within the .pex file.
pex_path = os.path.realpath(pex_file)
for name, module in list(sys.modules.items()):
if hasattr(module, '__path__') and any(os.path.realpath(path_item).startswith(pex_path)
for path_item in module.__path__):
reimport(name)

@classmethod
def write_zipped_internal_cache(cls, pex, pex_info):
Expand Down Expand Up @@ -192,7 +211,8 @@ def _activate(self):
self.update_candidate_distributions(self.load_internal_cache(self._pex, self._pex_info))

if not self._pex_info.zip_safe and os.path.isfile(self._pex):
self.update_module_paths(self.force_local(self._pex, self._pex_info))
explode_dir = self.force_local(pex_file=self._pex, pex_info=self._pex_info)
self.update_module_paths(pex_file=self._pex, explode_dir=explode_dir)

all_reqs = [pkg_resources.Requirement.parse(req) for req in self._pex_info.requirements]

Expand Down
26 changes: 9 additions & 17 deletions pex/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from __future__ import absolute_import, print_function

import os
import pkgutil
import sys
from distutils import sysconfig
from site import USER_SITE

from .bootstrap import Bootstrap
from .common import die
from .compatibility import exec_function
from .environment import PEXEnvironment
Expand Down Expand Up @@ -158,7 +158,7 @@ def minimum_sys_modules(cls, site_libs, modules=None):
metadata such as *.dist-info/namespace_packages.txt. This can possibly cause namespace
packages to leak into imports despite being scrubbed from sys.path.
NOTE: This method mutates modules' __path__ attributes in sys.module, so this is currently an
NOTE: This method mutates modules' __path__ attributes in sys.modules, so this is currently an
irreversible operation.
"""

Expand All @@ -171,7 +171,7 @@ def minimum_sys_modules(cls, site_libs, modules=None):
new_modules[module_name] = module
continue

# Unexpected objects, e.g. namespace packages, should just be dropped:
# Unexpected objects, e.g. PEP 420 namespace packages, should just be dropped.
if not isinstance(module.__path__, list):
TRACER.log('Dropping %s' % (module_name,), V=3)
continue
Expand Down Expand Up @@ -398,30 +398,22 @@ def _execute(self):
def demote_bootstrap(cls):
TRACER.log('Bootstrap complete, performing final sys.path modifications...')

bootstrap_path = __file__
module_import_path = __name__.split('.')
root_package = module_import_path[0]

# For example, our __file__ might be requests.pex/.bootstrap/_pex/pex.pyc and our import path
# _pex.pex; so we walk back through all the module components of our import path to find the
# base sys.path entry where we were found (requests.pex/.bootstrap in this example).
for _ in module_import_path:
bootstrap_path = os.path.dirname(bootstrap_path)
bootstrap_path_index = sys.path.index(bootstrap_path)
bootstrap = Bootstrap.locate()
bootstrap_path_index = sys.path.index(bootstrap.path_entry)

# Move the third party resources pex uses to the end of sys.path for the duration of the run to
# allow conflicting versions supplied by user dependencies to win during the course of the
# execution of user code.
for _, mod, _ in pkgutil.iter_modules([bootstrap_path]):
if mod != root_package: # We let _pex stay imported
for mod in bootstrap.iter_root_module_names():
if mod != bootstrap.pex_module_name: # We let _pex stay imported
TRACER.log('Un-importing third party bootstrap dependency %s from %s'
% (mod, bootstrap_path))
% (mod, bootstrap.path_entry))
sys.modules.pop(mod)
submod_prefix = mod + '.'
for submod in [m for m in sys.modules.keys() if m.startswith(submod_prefix)]:
sys.modules.pop(submod)
sys.path.pop(bootstrap_path_index)
sys.path.append(bootstrap_path)
sys.path.append(bootstrap.path_entry)

TRACER.log('PYTHONPATH contains:')
for element in sys.path:
Expand Down
87 changes: 86 additions & 1 deletion tests/test_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import platform
import subprocess
from contextlib import contextmanager
from textwrap import dedent

import pytest

Expand All @@ -15,7 +16,7 @@
from pex.pex import PEX
from pex.pex_builder import PEXBuilder
from pex.pex_info import PexInfo
from pex.testing import make_bdist, temporary_dir, temporary_filename
from pex.testing import make_bdist, temporary_content, temporary_dir, temporary_filename


@contextmanager
Expand Down Expand Up @@ -49,6 +50,90 @@ def test_force_local():
assert PEXEnvironment.force_local(pex_file, pb.info) == code_cache


def test_force_local_implicit_ns_packages_issues_598():
def create_foo_bar_setup(name, extra_args=None):
return dedent("""
from setuptools import setup
setup(
name=%r,
version='0.0.1',
packages=['foo', 'foo.bar'],
namespace_packages=['foo', 'foo.bar'],
%s
)
""" % (name, extra_args or ''))

content1 = {
'foo/bar/spam.py': 'identify = lambda: 42',
'setup.py': create_foo_bar_setup('foo-bar-spam')
}

content2 = {
'foo/bar/eggs.py': dedent("""
# NB: This only works when this content is unpacked loose on the filesystem!
def read_self():
with open(__file__) as fp:
return fp.read()
""")
}

content3 = {
'foobaz': dedent("""\
#!python
import sys
from foo.bar import baz
sys.exit(baz.main())
"""),
'foo/bar/baz.py': dedent("""
import sys
from foo.bar import eggs, spam
def main():
assert len(eggs.read_self()) > 0
return spam.identify()
"""),
'setup.py': create_foo_bar_setup('foo-bar-baz', extra_args='scripts=["foobaz"]')
}

def add_wheel(builder, content):
with temporary_content(content) as project:
dist = WheelInstaller(project, interpreter=builder.interpreter).bdist()
builder.add_dist_location(dist)

def add_sources(builder, content):
with temporary_content(content) as project:
for path in content.keys():
builder.add_source(os.path.join(project, path), path)

@contextmanager
def pex_builder(interpreter=None, **kwargs):
with vendor.adjusted_sys_path():
yield PEXBuilder(interpreter=vendor.setup_interpreter(interpreter), **kwargs)

with temporary_dir() as root:
pex_info1 = PexInfo.default()
pex_info1.zip_safe = False
pex1 = os.path.join(root, 'pex1.pex')
with pex_builder(pex_info=pex_info1) as builder1:
add_wheel(builder1, content1)
add_sources(builder1, content2)
builder1.build(pex1)

pex_info2 = PexInfo.default()
pex_info2.pex_path = pex1
pex2 = os.path.join(root, 'pex2')
with pex_builder(path=pex2, pex_info=pex_info2) as builder2:
add_wheel(builder2, content3)
builder2.set_script('foobaz')
builder2.freeze()

assert 42 == PEX(pex2).run(env=dict(PEX_VERBOSE='9'))


def normalize(path):
return os.path.normpath(os.path.realpath(path)).lower()

Expand Down
11 changes: 11 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,3 +1207,14 @@ def test_setup_python_multiple_direct_markers():

with environment_as(PATH=os.path.dirname(py27_interpreter)):
subprocess.check_call(py2_only_program)


def test_force_local_implicit_ns_packages_issues_598():
# This was a minimal repro for the issue documented in #598.
with temporary_dir() as out:
tcl_pex = os.path.join(out, 'tcl.pex')
run_pex_command(['twitter.common.lang==0.3.9', '-o', tcl_pex])

with environment_as(PEX_FORCE_LOCAL='1', PEX_PATH=tcl_pex):
result = subprocess.call([tcl_pex, '-c', 'from twitter.common.lang import Singleton'])
assert 0 == result

0 comments on commit beb77bd

Please sign in to comment.