From 7574cf47373687e7ab5161d3d3697302b4822e57 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Mon, 29 Oct 2018 16:53:30 -0600 Subject: [PATCH] Fix pex force local to handle PEP-420. Previously, pex would blow up trying to adjust import paths if a PEP-420 implicit namespace package was encountered. Add a test of the path adjustment functionality both to confirm it was needed (it was) and that the fix technique works with all forms of namespace packages by only assuming they support `append`. Fixes #598 --- pex/bootstrap.py | 67 ++++++++++++++++++++++++++++ pex/environment.py | 58 ++++++++++++++----------- pex/pex.py | 23 ++++------ tests/test_environment.py | 91 ++++++++++++++++++++++++++++++++++++++- tests/test_integration.py | 10 +++++ 5 files changed, 208 insertions(+), 41 deletions(-) create mode 100644 pex/bootstrap.py diff --git a/pex/bootstrap.py b/pex/bootstrap.py new file mode 100644 index 000000000..f113becba --- /dev/null +++ b/pex/bootstrap.py @@ -0,0 +1,67 @@ +# coding=utf-8 +# Copyright 2018 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import os +import pkgutil + + +class Bootstrap(object): + """Supports introspection of the PEX bootstrap code.""" + + @staticmethod + def _module_path(module_name): + return module_name.split('.') + + _INSTANCE = None + + @classmethod + def locate(cls): + """Locates the active PEX bootstrap. + + :rtype: :class:`Bootstrap` + """ + if cls._INSTANCE is None: + bootstrap_path = __file__ + module_import_path = cls._module_path(__name__) + + # For example, our __file__ might be requests.pex/.bootstrap/_pex/bootstrap.pyc and our import + # path _pex.bootstrap; so we walk back through all the module components of our import path to + # find the base sys.path entry where we were found (requests.pex/.bootstrap in this example). + for _ in module_import_path: + bootstrap_path = os.path.dirname(bootstrap_path) + + cls._INSTANCE = cls(path_entry=bootstrap_path) + return cls._INSTANCE + + def __init__(self, path_entry): + self._path_entry = path_entry + self._root_module_names = None + + @property + def path_entry(self): + """Return the ``sys.path`` entry that contains the PEX bootstrap code. + + :rtype: str + """ + return self._path_entry + + @property + def root_module_names(self): + """Return the set of root modules found in the PEX bootstrap. + + :rtype: :class:`collections.Set` of str + """ + if self._root_module_names is None: + def iter_module_names(): + for _, module_name, _ in pkgutil.iter_modules([self.path_entry]): + yield module_name + self._root_module_names = frozenset(iter_module_names()) + return self._root_module_names + + def is_bootstrap_module(self, module_name): + """Return ``True`` if the given ``module_name`` points into bootstrap code. + + :rtype: bool + """ + return self._module_path(module_name)[0] in self.root_module_names diff --git a/pex/environment.py b/pex/environment.py index 3dd6e427a..ba7ecb734 100644 --- a/pex/environment.py +++ b/pex/environment.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import importlib import itertools import os import site @@ -10,6 +11,7 @@ import uuid from pex import pex_builder +from pex.bootstrap import Bootstrap from pex.common import die, open_zip, rename_if_empty, safe_mkdir, safe_rmtree from pex.interpreter import PythonInterpreter from pex.package import distribution_compatible @@ -28,18 +30,18 @@ class PEXEnvironment(Environment): @classmethod - def force_local(cls, pex, pex_info): + def force_local(cls, pex_file, pex_info): if pex_info.code_hash is None: # Do not support force_local if code_hash is not set. (It should always be set.) - return pex + return pex_file explode_dir = os.path.join(pex_info.zip_unsafe_cache, pex_info.code_hash) TRACER.log('PEX is not zip safe, exploding to %s' % explode_dir) if not os.path.exists(explode_dir): explode_tmp = explode_dir + '.' + uuid.uuid4().hex - with TRACER.timed('Unzipping %s' % pex): + with TRACER.timed('Unzipping %s' % pex_file): try: safe_mkdir(explode_tmp) - with open_zip(pex) as pex_zip: + with open_zip(pex_file) as pex_zip: pex_files = (x for x in pex_zip.namelist() if not x.startswith(pex_builder.BOOTSTRAP_DIR) and not x.startswith(PexInfo.INTERNAL_CACHE)) @@ -52,26 +54,31 @@ def force_local(cls, pex, pex_info): return explode_dir @classmethod - def update_module_paths(cls, new_code_path): - # Force subsequent imports to come from the .pex directory rather than the .pex file. - TRACER.log('Adding to the head of sys.path: %s' % new_code_path) - sys.path.insert(0, new_code_path) - for name, module in sys.modules.items(): - if hasattr(module, '__path__'): - module_dir = os.path.join(new_code_path, *name.split(".")) - TRACER.log('Adding to the head of %s.__path__: %s' % (module.__name__, module_dir)) - try: - module.__path__.insert(0, module_dir) - except AttributeError: - # TODO: This is a temporary bandaid for an unhandled AttributeError which results - # in a startup crash. See https://github.com/pantsbuild/pex/issues/598 for more info. - TRACER.log( - 'Failed to insert %s: %s.__path__ of type %s does not support insertion!' % ( - module_dir, - module.__name__, - type(module.__path__) - ) - ) + def update_module_paths(cls, pex_file, explode_dir): + # Force subsequent imports to come from the exploded .pex directory rather than the .pex file. + TRACER.log('Adding to the head of sys.path: %s' % explode_dir) + sys.path.insert(0, explode_dir) + + bootstrap = Bootstrap.locate() + + def reimport(module_name): + if bootstrap.is_bootstrap_module(module_name): + TRACER.log('Not reimporting bootstrap module %s' % module_name, V=2) + return + + existing_module = sys.modules.pop(module_name) + TRACER.log('Reimporting %s loaded via %r from exploded pex' + % (module_name, existing_module.__path__)) + reimported_module = importlib.import_module(module_name) + for item in existing_module.__path__: + reimported_module.__path__.append(item) + + # And re-import any packages already loaded from within the .pex file. + pex_path = os.path.realpath(pex_file) + for name, module in list(sys.modules.items()): + if hasattr(module, '__path__') and any(os.path.realpath(path_item).startswith(pex_path) + for path_item in module.__path__): + reimport(name) @classmethod def write_zipped_internal_cache(cls, pex, pex_info): @@ -213,7 +220,8 @@ def _activate(self): self.update_candidate_distributions(self.load_internal_cache(self._pex, self._pex_info)) if not self._pex_info.zip_safe and os.path.isfile(self._pex): - self.update_module_paths(self.force_local(self._pex, self._pex_info)) + explode_dir = self.force_local(pex_file=self._pex, pex_info=self._pex_info) + self.update_module_paths(pex_file=self._pex, explode_dir=explode_dir) all_reqs = [Requirement.parse(req) for req in self._pex_info.requirements] diff --git a/pex/pex.py b/pex/pex.py index b968c4fd7..72d875f39 100644 --- a/pex/pex.py +++ b/pex/pex.py @@ -4,13 +4,13 @@ from __future__ import absolute_import, print_function import os -import pkgutil import sys from distutils import sysconfig from site import USER_SITE import pex.third_party.pkg_resources as pkg_resources from pex import third_party +from pex.bootstrap import Bootstrap from pex.common import die from pex.environment import PEXEnvironment from pex.executor import Executor @@ -163,7 +163,7 @@ def minimum_sys_modules(cls, site_libs, modules=None): metadata such as *.dist-info/namespace_packages.txt. This can possibly cause namespace packages to leak into imports despite being scrubbed from sys.path. - NOTE: This method mutates modules' __path__ attributes in sys.module, so this is currently an + NOTE: This method mutates modules' __path__ attributes in sys.modules, so this is currently an irreversible operation. """ @@ -176,7 +176,7 @@ def minimum_sys_modules(cls, site_libs, modules=None): new_modules[module_name] = module continue - # Unexpected objects, e.g. namespace packages, should just be dropped: + # Unexpected objects, e.g. PEP 420 namespace packages, should just be dropped. if not isinstance(module.__path__, list): TRACER.log('Dropping %s' % (module_name,), V=3) continue @@ -403,15 +403,8 @@ def _execute(self): def demote_bootstrap(cls): TRACER.log('Bootstrap complete, performing final sys.path modifications...') - bootstrap_path = __file__ - module_import_path = __name__.split('.') - - # For example, our __file__ might be requests.pex/.bootstrap/pex/pex.pyc and our import path - # pex.pex; so we walk back through all the module components of our import path to find the - # base sys.path entry where we were found (requests.pex/.bootstrap in this example). - for _ in module_import_path: - bootstrap_path = os.path.dirname(bootstrap_path) - bootstrap_path_index = sys.path.index(bootstrap_path) + bootstrap = Bootstrap.locate() + bootstrap_path_index = sys.path.index(bootstrap.path_entry) should_log = {level: TRACER.should_log(V=level) for level in range(1, 10)} @@ -424,9 +417,9 @@ def log(msg, V=1): # execution of user code. unregister_finders() third_party.uninstall() - for _, mod, _ in pkgutil.iter_modules([bootstrap_path]): + for mod in bootstrap.root_module_names: if mod in sys.modules: - log('Un-importing bootstrap dependency %s from %s' % (mod, bootstrap_path), V=2) + log('Un-importing bootstrap dependency %s from %s' % (mod, bootstrap.path_entry), V=2) sys.modules.pop(mod) log('un-imported {}'.format(mod), V=9) @@ -436,7 +429,7 @@ def log(msg, V=1): log('un-imported {}'.format(submod), V=9) sys.path.pop(bootstrap_path_index) - sys.path.append(bootstrap_path) + sys.path.append(bootstrap.path_entry) import pex log('Re-imported pex from {}'.format(pex.__path__), V=3) diff --git a/tests/test_environment.py b/tests/test_environment.py index c6a162fe4..9b02695d6 100644 --- a/tests/test_environment.py +++ b/tests/test_environment.py @@ -4,6 +4,7 @@ import platform import subprocess from contextlib import contextmanager +from textwrap import dedent import pytest @@ -16,7 +17,7 @@ from pex.pex import PEX from pex.pex_builder import PEXBuilder from pex.pex_info import PexInfo -from pex.testing import make_bdist, temporary_dir, temporary_filename +from pex.testing import make_bdist, temporary_content, temporary_dir, temporary_filename @contextmanager @@ -49,6 +50,94 @@ def test_force_local(): assert PEXEnvironment.force_local(pex_file, pb.info) == code_cache +def test_force_local_implicit_ns_packages_issues_598(): + def create_foo_bar_setup(name, extra_args=None): + return dedent(""" + from setuptools import setup + + setup( + name=%r, + version='0.0.1', + packages=['foo', 'foo.bar'], + namespace_packages=['foo', 'foo.bar'], + %s + ) + """ % (name, extra_args or '')) + + def with_foo_bar_ns_packages(content): + ns_packages = { + os.path.join(pkg, '__init__.py'): '__import__("pkg_resources").declare_namespace(__name__)' + for pkg in ('foo', 'foo/bar') + } + ns_packages.update(content) + return ns_packages + + content1 = with_foo_bar_ns_packages({ + 'foo/bar/spam.py': 'identify = lambda: 42', + 'setup.py': create_foo_bar_setup('foo-bar-spam') + }) + + content2 = with_foo_bar_ns_packages({ + 'foo/bar/eggs.py': dedent(""" + # NB: This only works when this content is unpacked loose on the filesystem! + def read_self(): + with open(__file__) as fp: + return fp.read() + """) + }) + + content3 = with_foo_bar_ns_packages({ + 'foobaz': dedent("""\ + #!python + import sys + + from foo.bar import baz + + sys.exit(baz.main()) + """), + 'foo/bar/baz.py': dedent(""" + import sys + + from foo.bar import eggs, spam + + def main(): + assert len(eggs.read_self()) > 0 + return spam.identify() + """), + 'setup.py': create_foo_bar_setup('foo-bar-baz', extra_args='scripts=["foobaz"]') + }) + + def add_wheel(builder, content): + with temporary_content(content) as project: + dist = WheelInstaller(project, interpreter=builder.interpreter).bdist() + builder.add_dist_location(dist) + + def add_sources(builder, content): + with temporary_content(content) as project: + for path in content.keys(): + builder.add_source(os.path.join(project, path), path) + + interpreter = vendor.setup_interpreter() + with temporary_dir() as root: + pex_info1 = PexInfo.default() + pex_info1.zip_safe = False + pex1 = os.path.join(root, 'pex1.pex') + builder1 = PEXBuilder(interpreter=interpreter, pex_info=pex_info1) + add_wheel(builder1, content1) + add_sources(builder1, content2) + builder1.build(pex1) + + pex_info2 = PexInfo.default() + pex_info2.pex_path = pex1 + pex2 = os.path.join(root, 'pex2') + builder2 = PEXBuilder(path=pex2, interpreter=interpreter, pex_info=pex_info2) + add_wheel(builder2, content3) + builder2.set_script('foobaz') + builder2.freeze() + + assert 42 == PEX(pex2).run(env=dict(PEX_VERBOSE='9')) + + def normalize(path): return os.path.normpath(os.path.realpath(path)).lower() diff --git a/tests/test_integration.py b/tests/test_integration.py index c032bc541..58de2d8a4 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1196,3 +1196,13 @@ def test_setup_python_multiple_direct_markers(): subprocess.check_call(py2_only_program, env=make_env(PATH=os.path.dirname(py36_interpreter))) subprocess.check_call(py2_only_program, env=make_env(PATH=os.path.dirname(py27_interpreter))) + + +def test_force_local_implicit_ns_packages_issues_598(): + # This was a minimal repro for the issue documented in #598. + with temporary_dir() as out: + tcl_pex = os.path.join(out, 'tcl.pex') + run_pex_command(['twitter.common.lang==0.3.9', '-o', tcl_pex]) + + subprocess.check_call([tcl_pex, '-c', 'from twitter.common.lang import Singleton'], + env=make_env(PEX_FORCE_LOCAL='1', PEX_PATH=tcl_pex))