Skip to content

Commit

Permalink
Fix pex force local to handle PEP-420.
Browse files Browse the repository at this point in the history
Previously, pex would blow up trying to adjust import paths if a PEP-420
implicit namespace package was encountered. Add a test of the path
adjustment functionality both to confirm it was needed (it was) and that
the fix technique works with all forms of namespace packages by only
assuming they support `append`.

Fixes pex-tool#598
  • Loading branch information
jsirois committed Nov 19, 2018
1 parent 9f2ba07 commit 7574cf4
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 41 deletions.
67 changes: 67 additions & 0 deletions pex/bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# coding=utf-8
# Copyright 2018 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import os
import pkgutil


class Bootstrap(object):
"""Supports introspection of the PEX bootstrap code."""

@staticmethod
def _module_path(module_name):
return module_name.split('.')

_INSTANCE = None

@classmethod
def locate(cls):
"""Locates the active PEX bootstrap.
:rtype: :class:`Bootstrap`
"""
if cls._INSTANCE is None:
bootstrap_path = __file__
module_import_path = cls._module_path(__name__)

# For example, our __file__ might be requests.pex/.bootstrap/_pex/bootstrap.pyc and our import
# path _pex.bootstrap; so we walk back through all the module components of our import path to
# find the base sys.path entry where we were found (requests.pex/.bootstrap in this example).
for _ in module_import_path:
bootstrap_path = os.path.dirname(bootstrap_path)

cls._INSTANCE = cls(path_entry=bootstrap_path)
return cls._INSTANCE

def __init__(self, path_entry):
self._path_entry = path_entry
self._root_module_names = None

@property
def path_entry(self):
"""Return the ``sys.path`` entry that contains the PEX bootstrap code.
:rtype: str
"""
return self._path_entry

@property
def root_module_names(self):
"""Return the set of root modules found in the PEX bootstrap.
:rtype: :class:`collections.Set` of str
"""
if self._root_module_names is None:
def iter_module_names():
for _, module_name, _ in pkgutil.iter_modules([self.path_entry]):
yield module_name
self._root_module_names = frozenset(iter_module_names())
return self._root_module_names

def is_bootstrap_module(self, module_name):
"""Return ``True`` if the given ``module_name`` points into bootstrap code.
:rtype: bool
"""
return self._module_path(module_name)[0] in self.root_module_names
58 changes: 33 additions & 25 deletions pex/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

from __future__ import absolute_import

import importlib
import itertools
import os
import site
import sys
import uuid

from pex import pex_builder
from pex.bootstrap import Bootstrap
from pex.common import die, open_zip, rename_if_empty, safe_mkdir, safe_rmtree
from pex.interpreter import PythonInterpreter
from pex.package import distribution_compatible
Expand All @@ -28,18 +30,18 @@

class PEXEnvironment(Environment):
@classmethod
def force_local(cls, pex, pex_info):
def force_local(cls, pex_file, pex_info):
if pex_info.code_hash is None:
# Do not support force_local if code_hash is not set. (It should always be set.)
return pex
return pex_file
explode_dir = os.path.join(pex_info.zip_unsafe_cache, pex_info.code_hash)
TRACER.log('PEX is not zip safe, exploding to %s' % explode_dir)
if not os.path.exists(explode_dir):
explode_tmp = explode_dir + '.' + uuid.uuid4().hex
with TRACER.timed('Unzipping %s' % pex):
with TRACER.timed('Unzipping %s' % pex_file):
try:
safe_mkdir(explode_tmp)
with open_zip(pex) as pex_zip:
with open_zip(pex_file) as pex_zip:
pex_files = (x for x in pex_zip.namelist()
if not x.startswith(pex_builder.BOOTSTRAP_DIR) and
not x.startswith(PexInfo.INTERNAL_CACHE))
Expand All @@ -52,26 +54,31 @@ def force_local(cls, pex, pex_info):
return explode_dir

@classmethod
def update_module_paths(cls, new_code_path):
# Force subsequent imports to come from the .pex directory rather than the .pex file.
TRACER.log('Adding to the head of sys.path: %s' % new_code_path)
sys.path.insert(0, new_code_path)
for name, module in sys.modules.items():
if hasattr(module, '__path__'):
module_dir = os.path.join(new_code_path, *name.split("."))
TRACER.log('Adding to the head of %s.__path__: %s' % (module.__name__, module_dir))
try:
module.__path__.insert(0, module_dir)
except AttributeError:
# TODO: This is a temporary bandaid for an unhandled AttributeError which results
# in a startup crash. See https://github.com/pantsbuild/pex/issues/598 for more info.
TRACER.log(
'Failed to insert %s: %s.__path__ of type %s does not support insertion!' % (
module_dir,
module.__name__,
type(module.__path__)
)
)
def update_module_paths(cls, pex_file, explode_dir):
# Force subsequent imports to come from the exploded .pex directory rather than the .pex file.
TRACER.log('Adding to the head of sys.path: %s' % explode_dir)
sys.path.insert(0, explode_dir)

bootstrap = Bootstrap.locate()

def reimport(module_name):
if bootstrap.is_bootstrap_module(module_name):
TRACER.log('Not reimporting bootstrap module %s' % module_name, V=2)
return

existing_module = sys.modules.pop(module_name)
TRACER.log('Reimporting %s loaded via %r from exploded pex'
% (module_name, existing_module.__path__))
reimported_module = importlib.import_module(module_name)
for item in existing_module.__path__:
reimported_module.__path__.append(item)

# And re-import any packages already loaded from within the .pex file.
pex_path = os.path.realpath(pex_file)
for name, module in list(sys.modules.items()):
if hasattr(module, '__path__') and any(os.path.realpath(path_item).startswith(pex_path)
for path_item in module.__path__):
reimport(name)

@classmethod
def write_zipped_internal_cache(cls, pex, pex_info):
Expand Down Expand Up @@ -213,7 +220,8 @@ def _activate(self):
self.update_candidate_distributions(self.load_internal_cache(self._pex, self._pex_info))

if not self._pex_info.zip_safe and os.path.isfile(self._pex):
self.update_module_paths(self.force_local(self._pex, self._pex_info))
explode_dir = self.force_local(pex_file=self._pex, pex_info=self._pex_info)
self.update_module_paths(pex_file=self._pex, explode_dir=explode_dir)

all_reqs = [Requirement.parse(req) for req in self._pex_info.requirements]

Expand Down
23 changes: 8 additions & 15 deletions pex/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from __future__ import absolute_import, print_function

import os
import pkgutil
import sys
from distutils import sysconfig
from site import USER_SITE

import pex.third_party.pkg_resources as pkg_resources
from pex import third_party
from pex.bootstrap import Bootstrap
from pex.common import die
from pex.environment import PEXEnvironment
from pex.executor import Executor
Expand Down Expand Up @@ -163,7 +163,7 @@ def minimum_sys_modules(cls, site_libs, modules=None):
metadata such as *.dist-info/namespace_packages.txt. This can possibly cause namespace
packages to leak into imports despite being scrubbed from sys.path.
NOTE: This method mutates modules' __path__ attributes in sys.module, so this is currently an
NOTE: This method mutates modules' __path__ attributes in sys.modules, so this is currently an
irreversible operation.
"""

Expand All @@ -176,7 +176,7 @@ def minimum_sys_modules(cls, site_libs, modules=None):
new_modules[module_name] = module
continue

# Unexpected objects, e.g. namespace packages, should just be dropped:
# Unexpected objects, e.g. PEP 420 namespace packages, should just be dropped.
if not isinstance(module.__path__, list):
TRACER.log('Dropping %s' % (module_name,), V=3)
continue
Expand Down Expand Up @@ -403,15 +403,8 @@ def _execute(self):
def demote_bootstrap(cls):
TRACER.log('Bootstrap complete, performing final sys.path modifications...')

bootstrap_path = __file__
module_import_path = __name__.split('.')

# For example, our __file__ might be requests.pex/.bootstrap/pex/pex.pyc and our import path
# pex.pex; so we walk back through all the module components of our import path to find the
# base sys.path entry where we were found (requests.pex/.bootstrap in this example).
for _ in module_import_path:
bootstrap_path = os.path.dirname(bootstrap_path)
bootstrap_path_index = sys.path.index(bootstrap_path)
bootstrap = Bootstrap.locate()
bootstrap_path_index = sys.path.index(bootstrap.path_entry)

should_log = {level: TRACER.should_log(V=level) for level in range(1, 10)}

Expand All @@ -424,9 +417,9 @@ def log(msg, V=1):
# execution of user code.
unregister_finders()
third_party.uninstall()
for _, mod, _ in pkgutil.iter_modules([bootstrap_path]):
for mod in bootstrap.root_module_names:
if mod in sys.modules:
log('Un-importing bootstrap dependency %s from %s' % (mod, bootstrap_path), V=2)
log('Un-importing bootstrap dependency %s from %s' % (mod, bootstrap.path_entry), V=2)
sys.modules.pop(mod)
log('un-imported {}'.format(mod), V=9)

Expand All @@ -436,7 +429,7 @@ def log(msg, V=1):
log('un-imported {}'.format(submod), V=9)

sys.path.pop(bootstrap_path_index)
sys.path.append(bootstrap_path)
sys.path.append(bootstrap.path_entry)

import pex
log('Re-imported pex from {}'.format(pex.__path__), V=3)
Expand Down
91 changes: 90 additions & 1 deletion tests/test_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import platform
import subprocess
from contextlib import contextmanager
from textwrap import dedent

import pytest

Expand All @@ -16,7 +17,7 @@
from pex.pex import PEX
from pex.pex_builder import PEXBuilder
from pex.pex_info import PexInfo
from pex.testing import make_bdist, temporary_dir, temporary_filename
from pex.testing import make_bdist, temporary_content, temporary_dir, temporary_filename


@contextmanager
Expand Down Expand Up @@ -49,6 +50,94 @@ def test_force_local():
assert PEXEnvironment.force_local(pex_file, pb.info) == code_cache


def test_force_local_implicit_ns_packages_issues_598():
def create_foo_bar_setup(name, extra_args=None):
return dedent("""
from setuptools import setup
setup(
name=%r,
version='0.0.1',
packages=['foo', 'foo.bar'],
namespace_packages=['foo', 'foo.bar'],
%s
)
""" % (name, extra_args or ''))

def with_foo_bar_ns_packages(content):
ns_packages = {
os.path.join(pkg, '__init__.py'): '__import__("pkg_resources").declare_namespace(__name__)'
for pkg in ('foo', 'foo/bar')
}
ns_packages.update(content)
return ns_packages

content1 = with_foo_bar_ns_packages({
'foo/bar/spam.py': 'identify = lambda: 42',
'setup.py': create_foo_bar_setup('foo-bar-spam')
})

content2 = with_foo_bar_ns_packages({
'foo/bar/eggs.py': dedent("""
# NB: This only works when this content is unpacked loose on the filesystem!
def read_self():
with open(__file__) as fp:
return fp.read()
""")
})

content3 = with_foo_bar_ns_packages({
'foobaz': dedent("""\
#!python
import sys
from foo.bar import baz
sys.exit(baz.main())
"""),
'foo/bar/baz.py': dedent("""
import sys
from foo.bar import eggs, spam
def main():
assert len(eggs.read_self()) > 0
return spam.identify()
"""),
'setup.py': create_foo_bar_setup('foo-bar-baz', extra_args='scripts=["foobaz"]')
})

def add_wheel(builder, content):
with temporary_content(content) as project:
dist = WheelInstaller(project, interpreter=builder.interpreter).bdist()
builder.add_dist_location(dist)

def add_sources(builder, content):
with temporary_content(content) as project:
for path in content.keys():
builder.add_source(os.path.join(project, path), path)

interpreter = vendor.setup_interpreter()
with temporary_dir() as root:
pex_info1 = PexInfo.default()
pex_info1.zip_safe = False
pex1 = os.path.join(root, 'pex1.pex')
builder1 = PEXBuilder(interpreter=interpreter, pex_info=pex_info1)
add_wheel(builder1, content1)
add_sources(builder1, content2)
builder1.build(pex1)

pex_info2 = PexInfo.default()
pex_info2.pex_path = pex1
pex2 = os.path.join(root, 'pex2')
builder2 = PEXBuilder(path=pex2, interpreter=interpreter, pex_info=pex_info2)
add_wheel(builder2, content3)
builder2.set_script('foobaz')
builder2.freeze()

assert 42 == PEX(pex2).run(env=dict(PEX_VERBOSE='9'))


def normalize(path):
return os.path.normpath(os.path.realpath(path)).lower()

Expand Down
10 changes: 10 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,3 +1196,13 @@ def test_setup_python_multiple_direct_markers():
subprocess.check_call(py2_only_program, env=make_env(PATH=os.path.dirname(py36_interpreter)))

subprocess.check_call(py2_only_program, env=make_env(PATH=os.path.dirname(py27_interpreter)))


def test_force_local_implicit_ns_packages_issues_598():
# This was a minimal repro for the issue documented in #598.
with temporary_dir() as out:
tcl_pex = os.path.join(out, 'tcl.pex')
run_pex_command(['twitter.common.lang==0.3.9', '-o', tcl_pex])

subprocess.check_call([tcl_pex, '-c', 'from twitter.common.lang import Singleton'],
env=make_env(PEX_FORCE_LOCAL='1', PEX_PATH=tcl_pex))

0 comments on commit 7574cf4

Please sign in to comment.