Normalize and edify subprocess execution. (#255)

- Add a new pex.executor.Executor class for normalized subprocess execution. - Add new structured exception types for known failure modes of subprocess execution to provide actionable information to the end users on failure. - Port over all known library usages of subprocess to pex.executor.Executor. - Lightweight manual integration testing with pants master consuming pex master via local whl resolves.
pex-tool · Jul 13, 2016 · 09efe0e · 09efe0e
1 parent 40ebd65
commit 09efe0e
Show file tree

Hide file tree

Showing 8 changed files with 267 additions and 56 deletions.
diff --git a/pex/compiler.py b/pex/compiler.py
@@ -3,9 +3,8 @@
 
 from __future__ import absolute_import
 
-import subprocess
-
 from .compatibility import to_bytes
+from .executor import Executor
 from .util import named_temporary_file
 
 
@@ -59,7 +58,8 @@ def main(root, relpaths):
 
 
 class Compiler(object):
-  class Error(Exception):
+  class Error(Exception): pass
+  class CompilationFailure(Error):  # N.B. This subclasses `Error` only for backwards compatibility.
     """Indicates an error compiling one or more python source files."""
 
   def __init__(self, interpreter):
@@ -81,10 +81,12 @@ def compile(self, root, relpaths):
     with named_temporary_file() as fp:
       fp.write(to_bytes(_COMPILER_MAIN % {'root': root, 'relpaths': relpaths}, encoding='utf-8'))
       fp.flush()
-      process = subprocess.Popen([self._interpreter.binary, fp.name],
-                                 stdout=subprocess.PIPE,
-                                 stderr=subprocess.PIPE)
-      out, err = process.communicate()
-      if process.returncode != 0:
-        raise self.Error(err)
-      return [pyc_relpath.decode('utf-8') for pyc_relpath in out.splitlines()]
+
+      try:
+        out, _ = Executor.execute([self._interpreter.binary, fp.name])
+      except Executor.NonZeroExit as e:
+        raise self.CompilationFailure(
+          'encountered %r during bytecode compilation.\nstderr was:\n%s\n' % (e, e.stderr)
+        )
+
+      return out.splitlines()
diff --git a/pex/executor.py b/pex/executor.py
@@ -0,0 +1,91 @@
+# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+import errno
+import subprocess
+
+from .compatibility import string
+
+
+class Executor(object):
+  """Handles execution of subprocesses in a structured way."""
+
+  class ExecutionError(Exception):
+    """Indicates failure to execute."""
+
+    def __init__(self, msg, cmd):
+      super(Executor.ExecutionError, self).__init__(msg)  # noqa
+      self.executable = cmd.split()[0] if isinstance(cmd, string) else cmd[0]
+      self.cmd = cmd
+
+  class NonZeroExit(ExecutionError):
+    """Indicates a non-zero exit code."""
+
+    def __init__(self, cmd, exit_code, stdout, stderr):
+      super(Executor.NonZeroExit, self).__init__(  # noqa
+        'received exit code %s during execution of `%s`' % (exit_code, cmd),
+        cmd
+      )
+      self.exit_code = exit_code
+      self.stdout = stdout
+      self.stderr = stderr
+
+  class ExecutableNotFound(ExecutionError):
+    """Indicates the executable was not found while attempting to execute."""
+
+    def __init__(self, cmd, exc):
+      super(Executor.ExecutableNotFound, self).__init__(  # noqa
+        'caught %r while trying to execute `%s`' % (exc, cmd),
+        cmd
+      )
+      self.exc = exc
+
+  @classmethod
+  def open_process(cls, cmd, env=None, cwd=None, combined=False, **kwargs):
+    """Opens a process object via subprocess.Popen().
+
+    :param string|list cmd: A list or string representing the command to run.
+    :param dict env: An environment dict for the execution.
+    :param string cwd: The target cwd for command execution.
+    :param bool combined: Whether or not to combine stdin and stdout streams.
+    :return: A `subprocess.Popen` object.
+    :raises: `Executor.ExecutableNotFound` when the executable requested to run does not exist.
+    """
+    assert len(cmd) > 0, 'cannot execute an empty command!'
+
+    try:
+      return subprocess.Popen(
+        cmd,
+        stdin=kwargs.pop('stdin', subprocess.PIPE),
+        stdout=kwargs.pop('stdout', subprocess.PIPE),
+        stderr=kwargs.pop('stderr', subprocess.STDOUT if combined else subprocess.PIPE),
+        cwd=cwd,
+        env=env,
+        **kwargs
+      )
+    except (IOError, OSError) as e:
+      if e.errno == errno.ENOENT:
+        raise cls.ExecutableNotFound(cmd, e)
+
+  @classmethod
+  def execute(cls, cmd, env=None, cwd=None, stdin_payload=None, **kwargs):
+    """Execute a command via subprocess.Popen and returns the stdio.
+
+    :param string|list cmd: A list or string representing the command to run.
+    :param dict env: An environment dict for the execution.
+    :param string cwd: The target cwd for command execution.
+    :param string stdin_payload: A string representing the stdin payload, if any, to send.
+    :return: A tuple of strings representing (stdout, stderr), pre-decoded for utf-8.
+    :raises: `Executor.ExecutableNotFound` when the executable requested to run does not exist.
+             `Executor.NonZeroExit` when the execution fails with a non-zero exit code.
+    """
+    process = cls.open_process(cmd=cmd, env=env, cwd=cwd, **kwargs)
+    stdout_raw, stderr_raw = process.communicate(input=stdin_payload)
+    # N.B. In cases where `stdout` or `stderr` is passed as parameters, these can be None.
+    stdout = stdout_raw.decode('utf-8') if stdout_raw is not None else stdout_raw
+    stderr = stderr_raw.decode('utf-8') if stderr_raw is not None else stderr_raw
+
+    if process.returncode != 0:
+      raise cls.NonZeroExit(cmd, process.returncode, stdout, stderr)
+
+    return stdout, stderr
diff --git a/pex/installer.py b/pex/installer.py
@@ -4,14 +4,14 @@
 from __future__ import absolute_import, print_function
 
 import os
-import subprocess
 import sys
 import tempfile
 
 from pkg_resources import Distribution, PathMetadata
 
 from .common import safe_mkdtemp, safe_rmtree
 from .compatibility import WINDOWS
+from .executor import Executor
 from .interpreter import PythonInterpreter
 from .tracer import TRACER
 from .version import SETUPTOOLS_REQUIREMENT, WHEEL_REQUIREMENT
@@ -100,22 +100,19 @@ def run(self):
       return self._installed
 
     with TRACER.timed('Installing %s' % self._install_tmp, V=2):
-      command = [self._interpreter.binary, '-']
-      command.extend(self._setup_command())
-      po = subprocess.Popen(command,
-          stdin=subprocess.PIPE,
-          stdout=subprocess.PIPE,
-          stderr=subprocess.PIPE,
-          env=self._interpreter.sanitized_environment(),
-          cwd=self._source_dir)
-      so, se = po.communicate(self.bootstrap_script.encode('ascii'))
-      self._installed = po.returncode == 0
-
-    if not self._installed:
-      name = os.path.basename(self._source_dir)
-      print('**** Failed to install %s. stdout:\n%s' % (name, so.decode('utf-8')), file=sys.stderr)
-      print('**** Failed to install %s. stderr:\n%s' % (name, se.decode('utf-8')), file=sys.stderr)
-      return self._installed
+      command = [self._interpreter.binary, '-'] + self._setup_command()
+      try:
+        Executor.execute(command,
+                         env=self._interpreter.sanitized_environment(),
+                         cwd=self._source_dir,
+                         stdin_payload=self.bootstrap_script.encode('ascii'))
+        self._installed = True
+      except Executor.NonZeroExit as e:
+        self._installed = False
+        name = os.path.basename(self._source_dir)
+        print('**** Failed to install %s (caused by: %r\n):' % (name, e), file=sys.stderr)
+        print('stdout:\n%s\nstderr:\n%s\n' % (e.stdout, e.stderr), file=sys.stderr)
+        return self._installed
 
     self._postprocess()
     return self._installed

diff --git a/pex/interpreter.py b/pex/interpreter.py
@@ -7,14 +7,14 @@
 
 import os
 import re
-import subprocess
 import sys
 from collections import defaultdict
 
 from pkg_resources import Distribution, Requirement, find_distributions
 
 from .base import maybe_requirement
 from .compatibility import string
+from .executor import Executor
 from .tracer import TRACER
 
 try:
@@ -235,20 +235,12 @@ def iter_extras():
   def _from_binary_external(cls, binary, path_extras):
     environ = cls.sanitized_environment()
     environ['PYTHONPATH'] = ':'.join(path_extras)
-    po = subprocess.Popen(
-        [binary],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        env=environ)
-    so, _ = po.communicate(ID_PY)
-    output = so.decode('utf8').splitlines()
+    stdout, _ = Executor.execute([binary], env=environ, stdin_payload=ID_PY)
+    output = stdout.splitlines()
     if len(output) == 0:
       raise cls.IdentificationError('Could not establish identity of %s' % binary)
     identity, extras = output[0], output[1:]
-    return cls(
-        binary,
-        PythonIdentity.from_id_string(identity),
-        extras=cls._parse_extras(extras))
+    return cls(binary, PythonIdentity.from_id_string(identity), extras=cls._parse_extras(extras))
 
   @classmethod
   def expand_path(cls, path):

diff --git a/pex/pex.py b/pex/pex.py
@@ -4,7 +4,6 @@
 from __future__ import absolute_import, print_function
 
 import os
-import subprocess
 import sys
 from contextlib import contextmanager
 from distutils import sysconfig
@@ -16,6 +15,7 @@
 from .common import die
 from .compatibility import exec_function
 from .environment import PEXEnvironment
+from .executor import Executor
 from .finders import get_entry_point_from_console_script, get_script_from_distributions
 from .interpreter import PythonInterpreter
 from .orderedset import OrderedSet
@@ -457,7 +457,7 @@ def cmdline(self, args=()):
     cmds.extend(args)
     return cmds
 
-  def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kw):
+  def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kwargs):
     """Run the PythonEnvironment in an interpreter in a subprocess.
 
     :keyword args: Additional arguments to be passed to the application being invoked by the
@@ -473,9 +473,12 @@ def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kw):
 
     cmdline = self.cmdline(args)
     TRACER.log('PEX.run invoking %s' % ' '.join(cmdline))
-    process = subprocess.Popen(
-        cmdline,
-        cwd=self._pex if with_chroot else os.getcwd(),
-        preexec_fn=os.setsid if setsid else None,
-        **kw)
+    process = Executor.open_process(cmdline,
+                                    cwd=self._pex if with_chroot else os.getcwd(),
+                                    preexec_fn=os.setsid if setsid else None,
+                                    # Explicitly don't redirect stdio for this execution.
+                                    stdin=None,
+                                    stdout=None,
+                                    stderr=None,
+                                    **kwargs)
     return process.wait() if blocking else process
diff --git a/pex/testing.py b/pex/testing.py
@@ -4,7 +4,6 @@
 import contextlib
 import os
 import random
-import subprocess
 import sys
 import tempfile
 import zipfile
@@ -14,6 +13,7 @@
 from .bin.pex import log, main
 from .common import safe_mkdir, safe_rmtree
 from .compatibility import nested
+from .executor import Executor
 from .installer import EggInstaller, Packager
 from .pex_builder import PEXBuilder
 from .util import DistributionHelper, named_temporary_file
@@ -212,14 +212,10 @@ def mock_logger(msg, v=None):
 
 
 # TODO(wickman) Why not PEX.run?
-def run_simple_pex(pex, args=(), env=None):
-  po = subprocess.Popen(
-      [sys.executable, pex] + list(args),
-      stdout=subprocess.PIPE,
-      stderr=subprocess.STDOUT,
-      env=env)
-  po.wait()
-  return po.stdout.read().replace(b'\r', b''), po.returncode
+def run_simple_pex(pex, args=(), env=None, stdin=None):
+  process = Executor.open_process([sys.executable, pex] + list(args), env=env, combined=True)
+  stdout, _ = process.communicate(input=stdin)
+  return stdout.replace(b'\r', b''), process.returncode
 
 
 def run_simple_pex_test(body, args=(), env=None, dists=None, coverage=False):

diff --git a/tests/test_executor.py b/tests/test_executor.py
@@ -0,0 +1,93 @@
+# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+import os
+
+import pytest
+from twitter.common.contextutil import temporary_dir
+
+from pex.executor import Executor
+
+
+TEST_EXECUTABLE = '/a/nonexistent/path/to/nowhere'
+TEST_CMD_LIST = [TEST_EXECUTABLE, '--version']
+TEST_CMD_STR = ' '.join(TEST_CMD_LIST)
+TEST_CMD_PARAMETERS = [TEST_CMD_LIST, TEST_CMD_STR]
+TEST_STDOUT = 'testing stdout'
+TEST_STDERR = 'testing stder'
+TEST_CODE = 3
+
+
+def test_executor_open_process_wait_return():
+  process = Executor.open_process('exit 8', shell=True)
+  exit_code = process.wait()
+  assert exit_code == 8
+
+
+def test_executor_open_process_communicate():
+  process = Executor.open_process(['/bin/echo', '-n', 'hello'])
+  stdout, stderr = process.communicate()
+  assert stdout.decode('utf-8') == 'hello'
+  assert stderr.decode('utf-8') == ''
+
+
+def test_executor_execute():
+  assert Executor.execute('/bin/echo -n stdout >&1', shell=True) == ('stdout', '')
+  assert Executor.execute('/bin/echo -n stderr >&2', shell=True) == ('', 'stderr')
+  assert Executor.execute(['/bin/echo', 'hello']) == ('hello\n', '')
+  assert Executor.execute(['/bin/echo', '-n', 'hello']) == ('hello', '')
+  assert Executor.execute('/bin/echo -n $HELLO', env={'HELLO': 'hey'}, shell=True) == ('hey', '')
+
+
+def test_executor_execute_zero():
+  Executor.execute('exit 0', shell=True)
+
+
+def test_executor_execute_stdio():
+  with temporary_dir() as tmp:
+    with open(os.path.join(tmp, 'stdout'), 'w+b') as fake_stdout:
+      with open(os.path.join(tmp, 'stderr'), 'w+b') as fake_stderr:
+        Executor.execute('/bin/echo -n TEST | tee /dev/stderr',
+                         shell=True,
+                         stdout=fake_stdout,
+                         stderr=fake_stderr)
+        fake_stdout.seek(0)
+        fake_stderr.seek(0)
+        assert fake_stdout.read().decode('utf-8') == 'TEST'
+        assert fake_stderr.read().decode('utf-8') == 'TEST'
+
+
+@pytest.mark.parametrize('testable', [Executor.open_process, Executor.execute])
+def test_executor_execute_not_found(testable):
+  with pytest.raises(Executor.ExecutableNotFound) as exc:
+    testable(TEST_CMD_LIST)
+  assert exc.value.executable == TEST_EXECUTABLE
+  assert exc.value.cmd == TEST_CMD_LIST
+
+
+@pytest.mark.parametrize('exit_code', [1, 127, -1])
+def test_executor_execute_nonzero(exit_code):
+  with pytest.raises(Executor.NonZeroExit) as exc:
+    Executor.execute('exit %s' % exit_code, shell=True)
+
+  if exit_code > 0:
+    assert exc.value.exit_code == exit_code
+
+
+@pytest.mark.parametrize('cmd', TEST_CMD_PARAMETERS)
+def test_executor_exceptions_executablenotfound(cmd):
+  exc_cause = OSError('test')
+  exc = Executor.ExecutableNotFound(cmd=cmd, exc=exc_cause)
+  assert exc.executable == TEST_EXECUTABLE
+  assert exc.cmd == cmd
+  assert exc.exc == exc_cause
+
+
+@pytest.mark.parametrize('cmd', TEST_CMD_PARAMETERS)
+def test_executor_exceptions_nonzeroexit(cmd):
+  exc = Executor.NonZeroExit(cmd=cmd, exit_code=TEST_CODE, stdout=TEST_STDOUT, stderr=TEST_STDERR)
+  assert exc.executable == TEST_EXECUTABLE
+  assert exc.cmd == cmd
+  assert exc.exit_code == TEST_CODE
+  assert exc.stdout == TEST_STDOUT
+  assert exc.stderr == TEST_STDERR