From 0a01eaf143f71df87748ce5b5bd1d84b0b165233 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Wed, 12 May 2021 11:47:46 -0700 Subject: [PATCH] Fix PEX_ROOT leak run and repl goals. (#12066) Previously we'd leak `.cache/pex_root` to the workspace. Beyond making it easier to (mistakenly) corrupt the Pex cache for interactive Pex processes it also could degrade performance of those processes since this was not a symlink to the `pex_root` named cache used by sandboxed processes. Force user's of PexEnvironment methods that support setting up a process to declare what type of process they are setting up by moving those methods to CompletePexEnvironment with one concrete implementation for sandboxed processes and one for workspace processes. Fixes #12055 --- .../backend/codegen/protobuf/python/rules.py | 6 +- src/python/pants/backend/python/goals/repl.py | 8 +- .../backend/python/goals/run_pex_binary.py | 4 +- .../goals/run_pex_binary_integration_test.py | 34 +++++ .../pants/backend/python/util_rules/pex.py | 22 ++-- .../backend/python/util_rules/pex_cli.py | 6 +- .../python/util_rules/pex_environment.py | 123 ++++++++++++------ 7 files changed, 144 insertions(+), 59 deletions(-) diff --git a/src/python/pants/backend/codegen/protobuf/python/rules.py b/src/python/pants/backend/codegen/protobuf/python/rules.py index 8d0a0f4ebe4..96287681492 100644 --- a/src/python/pants/backend/codegen/protobuf/python/rules.py +++ b/src/python/pants/backend/codegen/protobuf/python/rules.py @@ -21,7 +21,7 @@ VenvPex, VenvPexRequest, ) -from pants.backend.python.util_rules.pex_environment import PexEnvironment +from pants.backend.python.util_rules.pex_environment import SandboxPexEnvironment from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest from pants.core.util_rules.source_files import SourceFilesRequest from pants.core.util_rules.stripped_source_files import StrippedSourceFiles @@ -61,7 +61,7 @@ async def generate_python_from_protobuf( grpc_python_plugin: GrpcPythonPlugin, python_protobuf_subsystem: PythonProtobufSubsystem, python_protobuf_mypy_plugin: PythonProtobufMypyPlugin, - pex_environment: PexEnvironment, + pex_environment: SandboxPexEnvironment, ) -> GeneratedSources: download_protoc_request = Get( DownloadedExternalTool, ExternalToolRequest, protoc.get_request(Platform.current) @@ -193,7 +193,7 @@ async def generate_python_from_protobuf( description=f"Generating Python sources from {request.protocol_target.address}.", level=LogLevel.DEBUG, output_directories=(output_dir,), - append_only_caches=pex_environment.append_only_caches(), + append_only_caches=pex_environment.append_only_caches, ), ) diff --git a/src/python/pants/backend/python/goals/repl.py b/src/python/pants/backend/python/goals/repl.py index e1bb22f256c..d3fe859ba7c 100644 --- a/src/python/pants/backend/python/goals/repl.py +++ b/src/python/pants/backend/python/goals/repl.py @@ -3,7 +3,7 @@ from pants.backend.python.subsystems.ipython import IPython from pants.backend.python.util_rules.pex import Pex, PexRequest, PexRequirements -from pants.backend.python.util_rules.pex_environment import PexEnvironment +from pants.backend.python.util_rules.pex_environment import WorkspacePexEnvironment from pants.backend.python.util_rules.pex_from_targets import PexFromTargetsRequest from pants.backend.python.util_rules.python_sources import ( PythonSourceFiles, @@ -21,7 +21,9 @@ class PythonRepl(ReplImplementation): @rule(level=LogLevel.DEBUG) -async def create_python_repl_request(repl: PythonRepl, pex_env: PexEnvironment) -> ReplRequest: +async def create_python_repl_request( + repl: PythonRepl, pex_env: WorkspacePexEnvironment +) -> ReplRequest: requirements_request = Get( Pex, PexFromTargetsRequest, @@ -56,7 +58,7 @@ class IPythonRepl(ReplImplementation): @rule(level=LogLevel.DEBUG) async def create_ipython_repl_request( - repl: IPythonRepl, ipython: IPython, pex_env: PexEnvironment + repl: IPythonRepl, ipython: IPython, pex_env: WorkspacePexEnvironment ) -> ReplRequest: # Note that we get an intermediate PexRequest here (instead of going straight to a Pex) # so that we can get the interpreter constraints for use in ipython_request. diff --git a/src/python/pants/backend/python/goals/run_pex_binary.py b/src/python/pants/backend/python/goals/run_pex_binary.py index 2d85c50f62c..f55c8ee135a 100644 --- a/src/python/pants/backend/python/goals/run_pex_binary.py +++ b/src/python/pants/backend/python/goals/run_pex_binary.py @@ -10,7 +10,7 @@ ResolvePexEntryPointRequest, ) from pants.backend.python.util_rules.pex import Pex, PexRequest -from pants.backend.python.util_rules.pex_environment import PexEnvironment +from pants.backend.python.util_rules.pex_environment import WorkspacePexEnvironment from pants.backend.python.util_rules.pex_from_targets import PexFromTargetsRequest from pants.backend.python.util_rules.python_sources import ( PythonSourceFiles, @@ -28,7 +28,7 @@ async def create_pex_binary_run_request( field_set: PexBinaryFieldSet, pex_binary_defaults: PexBinaryDefaults, - pex_env: PexEnvironment, + pex_env: WorkspacePexEnvironment, ) -> RunRequest: entry_point, transitive_targets = await MultiGet( Get( diff --git a/src/python/pants/backend/python/goals/run_pex_binary_integration_test.py b/src/python/pants/backend/python/goals/run_pex_binary_integration_test.py index 61686d48f24..a5ded9718b6 100644 --- a/src/python/pants/backend/python/goals/run_pex_binary_integration_test.py +++ b/src/python/pants/backend/python/goals/run_pex_binary_integration_test.py @@ -2,6 +2,7 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import json +import os from textwrap import dedent from typing import Optional @@ -124,3 +125,36 @@ def test_no_strip_pex_env_issues_12057() -> None: ] result = run_pants(args) assert result.exit_code == 42, result.stderr + + +def test_no_leak_pex_root_issues_12055() -> None: + read_config_result = run_pants(["help-all"]) + read_config_result.assert_success() + config_data = json.loads(read_config_result.stdout) + global_advanced_options = { + option["config_key"]: [ + ranked_value["value"] for ranked_value in option["value_history"]["ranked_values"] + ][-1] + for option in config_data["scope_to_help_info"][""]["advanced"] + } + named_caches_dir = global_advanced_options["named_caches_dir"] + + sources = { + "src/app.py": "import os; print(os.environ['PEX_ROOT'])", + "src/BUILD": dedent( + """\ + python_library(name="lib") + pex_binary(entry_point="app.py") + """ + ), + } + with setup_tmpdir(sources) as tmpdir: + args = [ + "--backend-packages=pants.backend.python", + f"--source-root-patterns=['/{tmpdir}/src']", + "run", + f"{tmpdir}/src/app.py", + ] + result = run_pants(args) + result.assert_success() + assert os.path.join(named_caches_dir, "pex_root") == result.stdout.strip() diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py index 8d8bcc63366..69f97c978ea 100644 --- a/src/python/pants/backend/python/util_rules/pex.py +++ b/src/python/pants/backend/python/util_rules/pex.py @@ -23,9 +23,9 @@ from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints from pants.backend.python.util_rules.pex_cli import PexCliProcess, PexPEX from pants.backend.python.util_rules.pex_environment import ( - PexEnvironment, PexRuntimeEnvironment, PythonExecutable, + SandboxPexEnvironment, ) from pants.engine.collection import Collection, DeduplicatedCollection from pants.engine.engine_aware import EngineAwareParameter @@ -459,7 +459,7 @@ class VenvScriptWriter: def _create_venv_script( self, bash: BashBinary, - pex_environment: PexEnvironment, + pex_environment: SandboxPexEnvironment, *, script_path: PurePath, venv_executable: PurePath, @@ -503,14 +503,16 @@ def _create_venv_script( content=FileContent(path=str(script_path), content=script.encode(), is_executable=True), ) - def exe(self, bash: BashBinary, pex_environment: PexEnvironment) -> VenvScript: + def exe(self, bash: BashBinary, pex_environment: SandboxPexEnvironment) -> VenvScript: """Writes a safe shim for the venv's executable `pex` script.""" script_path = PurePath(f"{self.pex.name}_pex_shim.sh") return self._create_venv_script( bash, pex_environment, script_path=script_path, venv_executable=self.venv_dir / "pex" ) - def bin(self, bash: BashBinary, pex_environment: PexEnvironment, name: str) -> VenvScript: + def bin( + self, bash: BashBinary, pex_environment: SandboxPexEnvironment, name: str + ) -> VenvScript: """Writes a safe shim for an executable or script in the venv's `bin` directory.""" script_path = PurePath(f"{self.pex.name}_bin_{name}_shim.sh") return self._create_venv_script( @@ -520,7 +522,7 @@ def bin(self, bash: BashBinary, pex_environment: PexEnvironment, name: str) -> V venv_executable=self.venv_dir / "bin" / name, ) - def python(self, bash: BashBinary, pex_environment: PexEnvironment) -> VenvScript: + def python(self, bash: BashBinary, pex_environment: SandboxPexEnvironment) -> VenvScript: """Writes a safe shim for the venv's python binary.""" return self.bin(bash, pex_environment, "python") @@ -558,7 +560,7 @@ def wrap_venv_prex_request(pex_request: PexRequest) -> VenvPexRequest: @rule async def create_venv_pex( - request: VenvPexRequest, bash: BashBinary, pex_environment: PexEnvironment + request: VenvPexRequest, bash: BashBinary, pex_environment: SandboxPexEnvironment ) -> VenvPex: # VenvPex is motivated by improving performance of Python tools by eliminating traditional PEX # file startup overhead. @@ -707,7 +709,7 @@ def __init__( @rule -async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment) -> Process: +async def setup_pex_process(request: PexProcess, pex_environment: SandboxPexEnvironment) -> Process: pex = request.pex argv = pex_environment.create_argv(pex.name, *request.argv, python=pex.python) env = { @@ -727,7 +729,7 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment env=env, output_files=request.output_files, output_directories=request.output_directories, - append_only_caches=pex_environment.append_only_caches(), + append_only_caches=pex_environment.append_only_caches, timeout_seconds=request.timeout_seconds, execution_slot_variable=request.execution_slot_variable, cache_scope=request.cache_scope, @@ -779,7 +781,7 @@ def __init__( @rule async def setup_venv_pex_process( - request: VenvPexProcess, pex_environment: PexEnvironment + request: VenvPexProcess, pex_environment: SandboxPexEnvironment ) -> Process: venv_pex = request.venv_pex argv = (venv_pex.pex.argv0, *request.argv) @@ -796,7 +798,7 @@ async def setup_venv_pex_process( env=request.extra_env, output_files=request.output_files, output_directories=request.output_directories, - append_only_caches=pex_environment.append_only_caches(), + append_only_caches=pex_environment.append_only_caches, timeout_seconds=request.timeout_seconds, execution_slot_variable=request.execution_slot_variable, cache_scope=request.cache_scope, diff --git a/src/python/pants/backend/python/util_rules/pex_cli.py b/src/python/pants/backend/python/util_rules/pex_cli.py index 75adba56275..1795f60f375 100644 --- a/src/python/pants/backend/python/util_rules/pex_cli.py +++ b/src/python/pants/backend/python/util_rules/pex_cli.py @@ -10,9 +10,9 @@ from pants.backend.python.subsystems.python_native_code import PythonNativeCode from pants.backend.python.util_rules import pex_environment from pants.backend.python.util_rules.pex_environment import ( - PexEnvironment, PexRuntimeEnvironment, PythonExecutable, + SandboxPexEnvironment, ) from pants.core.util_rules import external_tool from pants.core.util_rules.external_tool import ( @@ -113,7 +113,7 @@ async def download_pex_pex(pex_binary: PexBinary) -> PexPEX: async def setup_pex_cli_process( request: PexCliProcess, pex_binary: PexPEX, - pex_env: PexEnvironment, + pex_env: SandboxPexEnvironment, python_native_code: PythonNativeCode, global_options: GlobalOptions, pex_runtime_env: PexRuntimeEnvironment, @@ -179,7 +179,7 @@ async def setup_pex_cli_process( env=env, output_files=request.output_files, output_directories=request.output_directories, - append_only_caches=pex_env.append_only_caches(), + append_only_caches=pex_env.append_only_caches, level=request.level, cache_scope=request.cache_scope, ) diff --git a/src/python/pants/backend/python/util_rules/pex_environment.py b/src/python/pants/backend/python/util_rules/pex_environment.py index a96d72f4cb5..6b422ef6956 100644 --- a/src/python/pants/backend/python/util_rules/pex_environment.py +++ b/src/python/pants/backend/python/util_rules/pex_environment.py @@ -1,9 +1,11 @@ # Copyright 2019 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +from __future__ import annotations + import os from dataclasses import dataclass -from pathlib import PurePath +from pathlib import Path, PurePath from textwrap import dedent from typing import Mapping, Optional, Tuple, cast @@ -106,45 +108,9 @@ class PexEnvironment(EngineAwareReturnType): path: Tuple[str, ...] interpreter_search_paths: Tuple[str, ...] subprocess_environment_dict: FrozenDict[str, str] + named_caches_dir: PurePath bootstrap_python: Optional[PythonExecutable] = None - def create_argv( - self, pex_filepath: str, *args: str, python: Optional[PythonExecutable] = None - ) -> Tuple[str, ...]: - python = python or self.bootstrap_python - if python: - return (python.path, pex_filepath, *args) - if os.path.basename(pex_filepath) == pex_filepath: - return (f"./{pex_filepath}", *args) - return (pex_filepath, *args) - - def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]: - """The environment to use for running anything with PEX. - - If the Process is run with a pre-selected Python interpreter, set `python_configured=True` - to avoid PEX from trying to find a new interpreter. - """ - d = dict( - PATH=create_path_env_var(self.path), - PEX_INHERIT_PATH="false", - PEX_IGNORE_RCFILES="true", - PEX_ROOT=str(self.pex_root), - **self.subprocess_environment_dict, - ) - # NB: We only set `PEX_PYTHON_PATH` if the Python interpreter has not already been - # pre-selected by Pants. Otherwise, Pex would inadvertently try to find another interpreter - # when running PEXes. (Creating a PEX will ignore this env var in favor of `--python-path`.) - if not python_configured: - d["PEX_PYTHON_PATH"] = create_path_env_var(self.interpreter_search_paths) - return d - - @property - def pex_root(self) -> PurePath: - return PurePath(".cache/pex_root") - - def append_only_caches(self) -> Mapping[str, str]: - return {"pex_root": str(self.pex_root)} - def level(self) -> LogLevel: return LogLevel.DEBUG if self.bootstrap_python else LogLevel.WARN @@ -233,9 +199,90 @@ def first_python_binary() -> Optional[PythonExecutable]: python_setup.interpreter_search_paths(pex_relevant_environment) ), subprocess_environment_dict=subprocess_env_vars.vars, + # TODO: This path normalization is duplicated with `engine_initializer.py`. How can we do + # the normalization only once, via the options system? + named_caches_dir=Path(global_options.options.named_caches_dir).resolve(), bootstrap_python=first_python_binary(), ) +@dataclass(frozen=True) +class CompletePexEnvironment: + _pex_environment: PexEnvironment + pex_root: PurePath + + _PEX_ROOT_DIRNAME = "pex_root" + + @property + def interpreter_search_paths(self) -> Tuple[str, ...]: + return self._pex_environment.interpreter_search_paths + + def create_argv( + self, pex_filepath: str, *args: str, python: Optional[PythonExecutable] = None + ) -> Tuple[str, ...]: + python = python or self._pex_environment.bootstrap_python + if python: + return (python.path, pex_filepath, *args) + if os.path.basename(pex_filepath) == pex_filepath: + return (f"./{pex_filepath}", *args) + return (pex_filepath, *args) + + def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]: + """The environment to use for running anything with PEX. + + If the Process is run with a pre-selected Python interpreter, set `python_configured=True` + to avoid PEX from trying to find a new interpreter. + """ + d = dict( + PATH=create_path_env_var(self._pex_environment.path), + PEX_INHERIT_PATH="false", + PEX_IGNORE_RCFILES="true", + PEX_ROOT=str(self.pex_root), + **self._pex_environment.subprocess_environment_dict, + ) + # NB: We only set `PEX_PYTHON_PATH` if the Python interpreter has not already been + # pre-selected by Pants. Otherwise, Pex would inadvertently try to find another interpreter + # when running PEXes. (Creating a PEX will ignore this env var in favor of `--python-path`.) + if not python_configured: + d["PEX_PYTHON_PATH"] = create_path_env_var(self.interpreter_search_paths) + return d + + +@dataclass(frozen=True) +class SandboxPexEnvironment(CompletePexEnvironment): + append_only_caches: FrozenDict[str, str] + + @classmethod + def create(cls, pex_environment) -> SandboxPexEnvironment: + pex_root = PurePath(".cache") / cls._PEX_ROOT_DIRNAME + return cls( + _pex_environment=pex_environment, + pex_root=pex_root, + append_only_caches=FrozenDict({cls._PEX_ROOT_DIRNAME: str(pex_root)}), + ) + + +class WorkspacePexEnvironment(CompletePexEnvironment): + @classmethod + def create(cls, pex_environment: PexEnvironment) -> WorkspacePexEnvironment: + # N.B.: When running in the workspace the engine doesn't offer an append_only_caches + # service to setup a symlink to our named cache for us. As such, we point the PEX_ROOT + # directly at the underlying append only cache in that case to re-use results there and + # to keep the workspace from being dirtied by the creation of a new Pex cache rooted + # there. + pex_root = pex_environment.named_caches_dir / cls._PEX_ROOT_DIRNAME + return cls(_pex_environment=pex_environment, pex_root=pex_root) + + +@rule +def sandbox_pex_environment(pex_environment: PexEnvironment) -> SandboxPexEnvironment: + return SandboxPexEnvironment.create(pex_environment) + + +@rule +def workspace_pex_environment(pex_environment: PexEnvironment) -> WorkspacePexEnvironment: + return WorkspacePexEnvironment.create(pex_environment) + + def rules(): return [*collect_rules(), *process.rules(), *subprocess_environment.rules()]