From 802ca3265bb08f69a90ed54463bbc9e22d410136 Mon Sep 17 00:00:00 2001 From: Joshua Cannon Date: Mon, 5 Dec 2022 14:47:15 -0600 Subject: [PATCH] Run python sources with a VenvPex (#17700) This change makes it so we use a `VenvPex` to run Python sources, which is a speed boost (I measure a gain of about ~500ms, which is also quoted in `pex.py`). In order to make this work (specifically ensuring we don't revert the fix for #12055) we now have to weave the complete pex environment through to `VenvPexRequest`. --- .../backend/codegen/protobuf/python/rules.py | 14 ++-- .../pants/backend/python/goals/run_helper.py | 69 +++++++++---------- .../backend/python/goals/run_python_source.py | 2 - .../run_python_source_integration_test.py | 32 ++++++++- .../pants/backend/python/lint/pylint/rules.py | 3 + .../pants/backend/python/util_rules/pex.py | 17 +++-- 6 files changed, 89 insertions(+), 48 deletions(-) diff --git a/src/python/pants/backend/codegen/protobuf/python/rules.py b/src/python/pants/backend/codegen/protobuf/python/rules.py index 5c1ba1f04ce..fdbf1a69dbc 100644 --- a/src/python/pants/backend/codegen/protobuf/python/rules.py +++ b/src/python/pants/backend/codegen/protobuf/python/rules.py @@ -99,12 +99,17 @@ async def generate_python_from_protobuf( protoc_gen_mypy_script = "protoc-gen-mypy" protoc_gen_mypy_grpc_script = "protoc-gen-mypy_grpc" mypy_pex = None + complete_pex_env = pex_environment.in_sandbox(working_directory=None) if python_protobuf_subsystem.mypy_plugin: mypy_request = python_protobuf_mypy_plugin.to_pex_request() mypy_pex = await Get( VenvPex, - VenvPexRequest(bin_names=[protoc_gen_mypy_script], pex_request=mypy_request), + VenvPexRequest( + pex_request=mypy_request, + complete_pex_env=complete_pex_env, + bin_names=[protoc_gen_mypy_script], + ), ) if request.protocol_target.get(ProtobufGrpcToggleField).value: @@ -117,8 +122,9 @@ async def generate_python_from_protobuf( mypy_pex = await Get( VenvPex, VenvPexRequest( - bin_names=[protoc_gen_mypy_script, protoc_gen_mypy_grpc_script], pex_request=mypy_request, + complete_pex_env=complete_pex_env, + bin_names=[protoc_gen_mypy_script, protoc_gen_mypy_grpc_script], ), ) @@ -178,9 +184,7 @@ async def generate_python_from_protobuf( description=f"Generating Python sources from {request.protocol_target.address}.", level=LogLevel.DEBUG, output_directories=(output_dir,), - append_only_caches=pex_environment.in_sandbox( - working_directory=None - ).append_only_caches, + append_only_caches=complete_pex_env.append_only_caches, ), ) diff --git a/src/python/pants/backend/python/goals/run_helper.py b/src/python/pants/backend/python/goals/run_helper.py index 152f7ac5492..299e4592e4a 100644 --- a/src/python/pants/backend/python/goals/run_helper.py +++ b/src/python/pants/backend/python/goals/run_helper.py @@ -2,9 +2,10 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). from __future__ import annotations +import dataclasses import os import textwrap -from typing import Iterable, Optional +from typing import Optional from pants.backend.python.subsystems.debugpy import DebugPy from pants.backend.python.target_types import ( @@ -15,7 +16,7 @@ ) from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints from pants.backend.python.util_rules.local_dists import LocalDistsPex, LocalDistsPexRequest -from pants.backend.python.util_rules.pex import Pex, PexRequest +from pants.backend.python.util_rules.pex import Pex, PexRequest, VenvPex, VenvPexRequest from pants.backend.python.util_rules.pex_environment import PexEnvironment from pants.backend.python.util_rules.pex_from_targets import ( InterpreterConstraintsRequest, @@ -45,10 +46,10 @@ async def _create_python_source_run_request( pex_env: PexEnvironment, run_in_sandbox: bool, console_script: Optional[ConsoleScript] = None, - additional_pex_args: Iterable[str] = (), ) -> RunRequest: addresses = [address] - entry_point, transitive_targets = await MultiGet( + interpreter_constraints, entry_point, transitive_targets = await MultiGet( + Get(InterpreterConstraints, InterpreterConstraintsRequest(addresses)), Get( ResolvedPexEntryPoint, ResolvePexEntryPointRequest(entry_point_field), @@ -56,35 +57,33 @@ async def _create_python_source_run_request( Get(TransitiveTargets, TransitiveTargetsRequest(addresses)), ) - interpreter_constraints = await Get( - InterpreterConstraints, InterpreterConstraintsRequest(addresses) - ) - pex_filename = ( address.generated_name.replace(".", "_") if address.generated_name else address.target_name ) - pex_get = Get( - Pex, - PexFromTargetsRequest( - addresses, - output_filename=f"{pex_filename}.pex", - internal_only=True, - include_source_files=False, - # `PEX_EXTRA_SYS_PATH` should contain this entry_point's module. - main=console_script or entry_point.val, - additional_args=( - *additional_pex_args, - # N.B.: Since we cobble together the runtime environment via PEX_EXTRA_SYS_PATH - # below, it's important for any app that re-executes itself that these environment - # variables are not stripped. - "--no-strip-pex-env", + + pex_request, sources = await MultiGet( + Get( + PexRequest, + PexFromTargetsRequest( + addresses, + output_filename=f"{pex_filename}.pex", + internal_only=True, + include_source_files=False, + # `PEX_EXTRA_SYS_PATH` should contain this entry_point's module. + main=console_script or entry_point.val, + additional_args=( + # N.B.: Since we cobble together the runtime environment via PEX_EXTRA_SYS_PATH + # below, it's important for any app that re-executes itself that these environment + # variables are not stripped. + "--no-strip-pex-env", + ), ), ), + Get( + PythonSourceFiles, + PythonSourceFilesRequest(transitive_targets.closure, include_files=True), + ), ) - sources_get = Get( - PythonSourceFiles, PythonSourceFilesRequest(transitive_targets.closure, include_files=True) - ) - pex, sources = await MultiGet(pex_get, sources_get) local_dists = await Get( LocalDistsPex, @@ -95,10 +94,14 @@ async def _create_python_source_run_request( sources=sources, ), ) + pex_request = dataclasses.replace( + pex_request, pex_path=(*pex_request.pex_path, local_dists.pex) + ) + complete_pex_environment = pex_env.in_workspace() + venv_pex = await Get(VenvPex, VenvPexRequest(pex_request, complete_pex_environment)) input_digests = [ - pex.digest, - local_dists.pex.digest, + venv_pex.digest, # Note regarding not-in-sandbox mode: You might think that the sources don't need to be copied # into the chroot when using inline sources. But they do, because some of them might be # codegenned, and those won't exist in the inline source tree. Rather than incurring the @@ -109,9 +112,6 @@ async def _create_python_source_run_request( ] merged_digest = await Get(Digest, MergeDigests(input_digests)) - complete_pex_env = pex_env.in_workspace() - args = complete_pex_env.create_argv(_in_chroot(pex.name), python=pex.python) - chrooted_source_roots = [_in_chroot(sr) for sr in sources.source_roots] # The order here is important: we want the in-repo sources to take precedence over their # copies in the sandbox (see above for why those copies exist even in non-sandboxed mode). @@ -120,14 +120,13 @@ async def _create_python_source_run_request( *chrooted_source_roots, ] extra_env = { - **pex_env.in_workspace().environment_dict(python_configured=pex.python is not None), - "PEX_PATH": _in_chroot(local_dists.pex.name), + **complete_pex_environment.environment_dict(python_configured=venv_pex.python is not None), "PEX_EXTRA_SYS_PATH": os.pathsep.join(source_roots), } return RunRequest( digest=merged_digest, - args=args, + args=[_in_chroot(venv_pex.pex.argv0)], extra_env=extra_env, ) diff --git a/src/python/pants/backend/python/goals/run_python_source.py b/src/python/pants/backend/python/goals/run_python_source.py index 24b729c2f46..f8dce7f599c 100644 --- a/src/python/pants/backend/python/goals/run_python_source.py +++ b/src/python/pants/backend/python/goals/run_python_source.py @@ -44,8 +44,6 @@ async def create_python_source_run_request( entry_point_field=PexEntryPointField(field_set.source.value, field_set.address), pex_env=pex_env, run_in_sandbox=run_goal_use_sandbox, - # Setting --venv is kosher because the PEX we create is just for the thirdparty deps. - additional_pex_args=["--venv"], ) diff --git a/src/python/pants/backend/python/goals/run_python_source_integration_test.py b/src/python/pants/backend/python/goals/run_python_source_integration_test.py index 03adf1693ea..3c1608e29eb 100644 --- a/src/python/pants/backend/python/goals/run_python_source_integration_test.py +++ b/src/python/pants/backend/python/goals/run_python_source_integration_test.py @@ -222,4 +222,34 @@ def test_local_dist() -> None: f"{tmpdir}/foo/main.py", ] result = run_pants(args) - assert result.stdout == "LOCAL DIST\n" + assert result.stdout == "LOCAL DIST\n", result.stderr + + +def test_runs_in_venv() -> None: + # NB: We aren't just testing an implementation detail, users can and should expect their code to + # be run just as if they ran their code in a virtualenv (as is common in the Python ecosystem). + sources = { + "src/app.py": dedent( + """\ + import os + import sys + + if __name__ == "__main__": + sys.exit(0 if "VIRTUAL_ENV" in os.environ else 1) + """ + ), + "src/BUILD": dedent( + """\ + python_sources(name="lib") + """ + ), + } + with setup_tmpdir(sources) as tmpdir: + args = [ + "--backend-packages=pants.backend.python", + f"--source-root-patterns=['/{tmpdir}/src']", + "run", + f"{tmpdir}/src/app.py", + ] + result = run_pants(args) + assert result.exit_code == 0 diff --git a/src/python/pants/backend/python/lint/pylint/rules.py b/src/python/pants/backend/python/lint/pylint/rules.py index 9d858dac9c3..b7a485f3833 100644 --- a/src/python/pants/backend/python/lint/pylint/rules.py +++ b/src/python/pants/backend/python/lint/pylint/rules.py @@ -27,6 +27,7 @@ VenvPexProcess, VenvPexRequest, ) +from pants.backend.python.util_rules.pex_environment import PexEnvironment from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest from pants.backend.python.util_rules.python_sources import ( PythonSourceFiles, @@ -118,6 +119,7 @@ async def run_pylint( request: PylintRequest.Batch[PylintFieldSet, PartitionMetadata], pylint: Pylint, first_party_plugins: PylintFirstPartyPlugins, + pex_environment: PexEnvironment, ) -> LintResult: assert request.partition_metadata is not None @@ -181,6 +183,7 @@ async def run_pylint( internal_only=True, pex_path=[pylint_pex, requirements_pex], ), + pex_environment.in_sandbox(working_directory=None), # Astroid < 2.9.1 had a regression that prevented the use of symlinks: # https://github.com/PyCQA/pylint/issues/1470 site_packages_copies=(astroid_info.version < packaging.version.Version("2.9.1")), diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py index fb541d20ba0..a711aafe3c0 100644 --- a/src/python/pants/backend/python/util_rules/pex.py +++ b/src/python/pants/backend/python/util_rules/pex.py @@ -667,13 +667,12 @@ class VenvScriptWriter: @classmethod def create( - cls, pex_environment: PexEnvironment, pex: Pex, venv_rel_dir: PurePath + cls, complete_pex_env: CompletePexEnvironment, pex: Pex, venv_rel_dir: PurePath ) -> VenvScriptWriter: # N.B.: We don't know the working directory that will be used in any given # invocation of the venv scripts; so we deal with working_directory once in an # `adjust_relative_paths` function inside the script to save rule authors from having to do # CWD offset math in every rule for all the relative paths their process depends on. - complete_pex_env = pex_environment.in_sandbox(working_directory=None) venv_dir = complete_pex_env.pex_root / venv_rel_dir return cls(complete_pex_env=complete_pex_env, pex=pex, venv_dir=venv_dir) @@ -795,18 +794,21 @@ class VenvPex: @dataclass(unsafe_hash=True) class VenvPexRequest: pex_request: PexRequest + complete_pex_env: CompletePexEnvironment bin_names: tuple[str, ...] = () site_packages_copies: bool = False def __init__( self, pex_request: PexRequest, + complete_pex_env: CompletePexEnvironment, bin_names: Iterable[str] = (), site_packages_copies: bool = False, ) -> None: """A request for a PEX that runs in a venv and optionally exposes select venv `bin` scripts. :param pex_request: The details of the desired PEX. + :param complete_pex_env: The complete PEX environment the pex will be run in. :param bin_names: The names of venv `bin` scripts to expose for execution. :param site_packages_copies: `True` to use copies (hardlinks when possible) of PEX dependencies when installing them in the venv site-packages directory. By default this @@ -814,14 +816,17 @@ def __init__( but results in a non-standard venv structure that does trip up some libraries. """ self.pex_request = pex_request + self.complete_pex_env = complete_pex_env self.bin_names = tuple(bin_names) self.site_packages_copies = site_packages_copies @rule -def wrap_venv_prex_request(pex_request: PexRequest) -> VenvPexRequest: +def wrap_venv_prex_request( + pex_request: PexRequest, pex_environment: PexEnvironment +) -> VenvPexRequest: # Allow creating a VenvPex from a plain PexRequest when no extra bin scripts need to be exposed. - return VenvPexRequest(pex_request) + return VenvPexRequest(pex_request, pex_environment.in_sandbox(working_directory=None)) @rule @@ -877,7 +882,9 @@ async def create_venv_pex( venv_rel_dir = abs_pex_path.relative_to(abs_pex_root).parent venv_script_writer = VenvScriptWriter.create( - pex_environment=pex_environment, pex=venv_pex_result.create_pex(), venv_rel_dir=venv_rel_dir + complete_pex_env=request.complete_pex_env, + pex=venv_pex_result.create_pex(), + venv_rel_dir=venv_rel_dir, ) pex = venv_script_writer.exe(bash) python = venv_script_writer.python(bash)