Skip to content

Commit

Permalink
Bytecode compile using the correct interpreter.
Browse files Browse the repository at this point in the history
Previously a PEXBuilder with a custom interpeter would not use it to
bytecode compile sources when building a pex.  This could lead to errors
when the current interpreter and the target interpreter spanned breaking
language changes.

Kill the CodeMarshaller which has only its marshalling half used in
favor of using py_compile from the stdlib in the new
interpreter-sensitive Compiler class.

Additionally, actually compile all PEXBuilder source including any
generated __init__.pys, the generated __main__.py and everything in the
_pex/ bootstrap.

Testing Done:
CI went green here:
  https://travis-ci.org/pantsbuild/pex/builds/69010594

Bugs closed: 14, 126, 127

Reviewed at https://rbcommons.com/s/twitter/r/2350/
  • Loading branch information
jsirois committed Jun 30, 2015
1 parent 03ef43d commit ac3c7f1
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 109 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ CHANGES
----------

* Allow PEXBuilder to optionally copy files into the PEX environment instead of hard-linking them.

* Allow PEXBuilder to optionally skip precompilation of .py files into .pyc files.

* Bug fix: PEXBuilder did not respect the target interpreter when compiling source to bytecode.
Fixes `#127 <https://github.com/pantsbuild/pex/issues/127>`_.

-----
1.0.0
-----
Expand Down
3 changes: 2 additions & 1 deletion pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ def touch(self, dst, label=None):
Has similar exceptional cases as Chroot.copy
"""
dst = self._normalize(dst)
self.write('', dst, label, mode='a')
self._tag(dst, label)
touch(os.path.join(self.chroot, dst))

def get(self, label):
"""Get all files labeled with ``label``"""
Expand Down
90 changes: 90 additions & 0 deletions pex/compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import subprocess
import tempfile

from .compatibility import to_bytes


_COMPILER_MAIN = """
from __future__ import print_function
import os
import py_compile
import sys
def compile(root, relpaths):
compiled = []
errored = {}
for relpath in relpaths:
abspath = os.path.join(root, relpath)
# NB: We give the compiled bytecode file a `.pyc` extension, but if PYTHONOPTIMIZE is in play
# the generated bytecode will be optimized. Traditionally these optimized bytecode files would
# have a `.pyo` extension, but the extension only matters for location of the file to execute
# for a given module and not on the interpretation of its bytecode contents. As such we're
# safe to pick the `.pyc` extension for all bytecode file cases without a need to interpret the
# current optimization setting for the active python interpreter.
pyc_relpath = relpath + 'c'
pyc_abspath = os.path.join(root, pyc_relpath)
try:
py_compile.compile(abspath, cfile=pyc_abspath, dfile=relpath, doraise=True)
compiled.append(pyc_relpath)
except py_compile.PyCompileError as e:
errored[e.file] = e.msg
return compiled, errored
def main(root, relpaths):
compiled, errored = compile(root, relpaths)
if not errored:
for path in compiled:
print(path)
sys.exit(0)
print('Encountered %%d errors compiling %%d files:' %% (len(errored), len(relpaths)),
file=sys.stderr)
for file, msg in errored.items():
print(' %%s: %%s' %% (file, msg), file=sys.stderr)
sys.exit(1)
root = %(root)r
relpaths = %(relpaths)r
main(root, relpaths)
"""


class Compiler(object):
class Error(Exception):
"""Indicates an error compiling one or more python source files."""

def __init__(self, interpreter):
"""Creates a bytecode compiler for the given `interpreter`.
:param interpreter: The interpreter to use to compile sources with.
:type interpreter: :class:`pex.interpreter.PythonInterpreter`
"""
self._interpreter = interpreter

def compile(self, root, relpaths):
"""Compiles the given python source files using this compiler's interpreter.
:param string root: The root path all the source files are found under.
:param list relpaths: The realtive paths from the `root` of the source files to compile.
:returns: A list of relative paths of the compiled bytecode files.
:raises: A :class:`Compiler.Error` if there was a problem bytecode compiling any of the files.
"""
with tempfile.NamedTemporaryFile() as fp:
fp.write(to_bytes(_COMPILER_MAIN % {'root': root, 'relpaths': relpaths}, encoding='utf-8'))
fp.flush()
process = subprocess.Popen([self._interpreter.binary, fp.name],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = process.communicate()
if process.returncode != 0:
raise self.Error(err)
return [pyc_relpath.decode('utf-8') for pyc_relpath in out.splitlines()]
86 changes: 0 additions & 86 deletions pex/marshaller.py

This file was deleted.

42 changes: 26 additions & 16 deletions pex/pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

from .common import Chroot, chmod_plus_x, open_zip, safe_mkdir, safe_mkdtemp
from .compatibility import to_bytes
from .compiler import Compiler
from .finders import get_entry_point_from_console_script, get_script_from_distributions
from .interpreter import PythonInterpreter
from .marshaller import CodeMarshaller
from .pex_info import PexInfo
from .util import CacheHelper, DistributionHelper

Expand Down Expand Up @@ -127,21 +127,15 @@ def info(self, value):
self._ensure_unfrozen('Changing PexInfo')
self._pex_info = value

def add_source(self, filename, env_filename, precompile_python=True):
def add_source(self, filename, env_filename):
"""Add a source to the PEX environment.
:param filename: The source filename to add to the PEX.
:param env_filename: The destination filename in the PEX.
:param compile_python: If True, precompile .py files into .pyc files.
:param env_filename: The destination filename in the PEX. This path
must be a relative path.
"""
self._ensure_unfrozen('Adding source')
self._copy_or_link(filename, env_filename, 'source')
if precompile_python and filename.endswith('.py'):
env_filename_pyc = os.path.splitext(env_filename)[0] + '.pyc'
with open(filename) as fp:
pyc_object = CodeMarshaller.from_py(fp.read(), env_filename)
self._chroot.write(pyc_object.to_pyc(), env_filename_pyc, 'source')
self._copy_or_link(filename, env_filename, "source")

def add_resource(self, filename, env_filename):
"""Add a resource to the PEX environment.
Expand Down Expand Up @@ -323,6 +317,15 @@ def _prepare_inits(self):
self._chroot.write(bytes(import_string, 'UTF-8'), sub_path)
init_digest.add(sub_path)

def _precompile_source(self):
source_relpaths = [path for label in ('source', 'executable', 'main', 'bootstrap')
for path in self._chroot.filesets.get(label, ()) if path.endswith('.py')]

compiler = Compiler(self.interpreter)
compiled_relpaths = compiler.compile(self._chroot.path(), source_relpaths)
for compiled in compiled_relpaths:
self._chroot.touch(compiled, label='bytecode')

def _prepare_manifest(self):
self._chroot.write(self._pex_info.dump().encode('utf-8'), PexInfo.PATH, label='manifest')

Expand Down Expand Up @@ -356,8 +359,10 @@ def _prepare_bootstrap(self):

for fn, content_stream in DistributionHelper.walk_data(setuptools):
if fn.startswith('pkg_resources') or fn.startswith('_markerlib'):
self._chroot.write(content_stream.read(), os.path.join(self.BOOTSTRAP_DIR, fn), 'resource')
wrote_setuptools = True
if not fn.endswith('.pyc'): # We'll compile our own .pyc's later.
dst = os.path.join(self.BOOTSTRAP_DIR, fn)
self._chroot.write(content_stream.read(), dst, 'bootstrap')
wrote_setuptools = True

if not wrote_setuptools:
raise RuntimeError(
Expand All @@ -376,11 +381,13 @@ def _prepare_bootstrap(self):
for fn in provider.resource_listdir(''):
if fn.endswith('.py'):
self._chroot.write(provider.get_resource_string(source_name, fn),
os.path.join(self.BOOTSTRAP_DIR, target_location, fn), 'resource')
os.path.join(self.BOOTSTRAP_DIR, target_location, fn), 'bootstrap')

def freeze(self):
def freeze(self, bytecode_compile=True):
"""Freeze the PEX.
:param bytecode_compile: If True, precompile .py files into .pyc files when freezing code.
Freezing the PEX writes all the necessary metadata and environment bootstrapping code. It may
only be called once and renders the PEXBuilder immutable.
"""
Expand All @@ -390,18 +397,21 @@ def freeze(self):
self._prepare_manifest()
self._prepare_bootstrap()
self._prepare_main()
if bytecode_compile:
self._precompile_source()
self._frozen = True

def build(self, filename):
def build(self, filename, bytecode_compile=True):
"""Package the PEX into a zipfile.
:param filename: The filename where the PEX should be stored.
:param bytecode_compile: If True, precompile .py files into .pyc files.
If the PEXBuilder is not yet frozen, it will be frozen by ``build``. This renders the
PEXBuilder immutable.
"""
if not self._frozen:
self.freeze()
self.freeze(bytecode_compile=bytecode_compile)
try:
os.unlink(filename + '~')
self._logger.warn('Previous binary unexpectedly exists, cleaning: %s' % (filename + '~'))
Expand Down
72 changes: 72 additions & 0 deletions tests/test_compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import contextlib
import os

import marshal
import pytest
from twitter.common.contextutil import temporary_dir

from pex import compatibility
from pex.common import safe_open
from pex.compatibility import to_bytes
from pex.compiler import Compiler
from pex.interpreter import PythonInterpreter


def write_source(path, valid=True):
with safe_open(path, 'wb') as fp:
fp.write(to_bytes('basename = %r\n' % os.path.basename(path)))
if not valid:
fp.write(to_bytes('invalid!\n'))


@contextlib.contextmanager
def compilation(valid_paths=None, invalid_paths=None, compile_paths=None):
with temporary_dir() as root:
for path in valid_paths:
write_source(os.path.join(root, path))
for path in invalid_paths:
write_source(os.path.join(root, path), valid=False)
compiler = Compiler(PythonInterpreter.get())
yield root, compiler.compile(root, compile_paths)


def test_compile_success():
with compilation(valid_paths=['a.py', 'c/c.py'],
invalid_paths=['b.py', 'd/d.py'],
compile_paths=['a.py', 'c/c.py']) as (root, compiled_relpaths):

assert 2 == len(compiled_relpaths)

results = {}
for compiled in compiled_relpaths:
compiled_abspath = os.path.join(root, compiled)
with open(compiled_abspath, 'rb') as fp:
fp.read(4) # Skip the magic header.
fp.read(4) # Skip the timestamp.
if compatibility.PY3:
fp.read(4) # Skip the size.
code = marshal.load(fp)
local_symbols = {}
exec(code, {}, local_symbols)
results[compiled] = local_symbols

assert {'basename': 'a.py'} == results.pop('a.pyc')
assert {'basename': 'c.py'} == results.pop('c/c.pyc')
assert 0 == len(results)


def test_compile_failure():
with pytest.raises(Compiler.Error) as e:
with compilation(valid_paths=['a.py', 'c/c.py'],
invalid_paths=['b.py', 'd/d.py'],
compile_paths=['a.py', 'b.py', 'c/c.py', 'd/d.py']):
raise AssertionError('Should not reach here.')

message = str(e.value)
assert 'a.py' not in message
assert 'b.py' in message
assert 'c/c.py' not in message
assert 'd/d.py' in message
2 changes: 1 addition & 1 deletion tests/test_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_force_local():
assert os.path.exists(pb.info.zip_unsafe_cache)
assert len(os.listdir(pb.info.zip_unsafe_cache)) == 1
assert [os.path.basename(code_cache)] == os.listdir(pb.info.zip_unsafe_cache)
assert set(os.listdir(code_cache)) == set([PexInfo.PATH, '__main__.py'])
assert set(os.listdir(code_cache)) == set([PexInfo.PATH, '__main__.py', '__main__.pyc'])

# idempotence
assert PEXEnvironment.force_local(pex_file.name, pb.info) == code_cache
Expand Down
Loading

0 comments on commit ac3c7f1

Please sign in to comment.