Skip to content

Commit

Permalink
Fix PEX_ROOT leak run and repl goals. (#12066)
Browse files Browse the repository at this point in the history
Previously we'd leak `.cache/pex_root` to the workspace. Beyond making it
easier to (mistakenly) corrupt the Pex cache for interactive Pex processes
it also could degrade performance of those processes since this was not
a symlink to the `pex_root` named cache used by sandboxed processes.

Force user's of PexEnvironment methods that support setting up a process
to declare what type of process they are setting up by moving those
methods to CompletePexEnvironment with one concrete implementation for
sandboxed processes and one for workspace processes.

Fixes #12055
  • Loading branch information
jsirois authored May 12, 2021
1 parent 1d4dc1b commit 0a01eaf
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 59 deletions.
6 changes: 3 additions & 3 deletions src/python/pants/backend/codegen/protobuf/python/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
VenvPex,
VenvPexRequest,
)
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_environment import SandboxPexEnvironment
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.source_files import SourceFilesRequest
from pants.core.util_rules.stripped_source_files import StrippedSourceFiles
Expand Down Expand Up @@ -61,7 +61,7 @@ async def generate_python_from_protobuf(
grpc_python_plugin: GrpcPythonPlugin,
python_protobuf_subsystem: PythonProtobufSubsystem,
python_protobuf_mypy_plugin: PythonProtobufMypyPlugin,
pex_environment: PexEnvironment,
pex_environment: SandboxPexEnvironment,
) -> GeneratedSources:
download_protoc_request = Get(
DownloadedExternalTool, ExternalToolRequest, protoc.get_request(Platform.current)
Expand Down Expand Up @@ -193,7 +193,7 @@ async def generate_python_from_protobuf(
description=f"Generating Python sources from {request.protocol_target.address}.",
level=LogLevel.DEBUG,
output_directories=(output_dir,),
append_only_caches=pex_environment.append_only_caches(),
append_only_caches=pex_environment.append_only_caches,
),
)

Expand Down
8 changes: 5 additions & 3 deletions src/python/pants/backend/python/goals/repl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pants.backend.python.subsystems.ipython import IPython
from pants.backend.python.util_rules.pex import Pex, PexRequest, PexRequirements
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_environment import WorkspacePexEnvironment
from pants.backend.python.util_rules.pex_from_targets import PexFromTargetsRequest
from pants.backend.python.util_rules.python_sources import (
PythonSourceFiles,
Expand All @@ -21,7 +21,9 @@ class PythonRepl(ReplImplementation):


@rule(level=LogLevel.DEBUG)
async def create_python_repl_request(repl: PythonRepl, pex_env: PexEnvironment) -> ReplRequest:
async def create_python_repl_request(
repl: PythonRepl, pex_env: WorkspacePexEnvironment
) -> ReplRequest:
requirements_request = Get(
Pex,
PexFromTargetsRequest,
Expand Down Expand Up @@ -56,7 +58,7 @@ class IPythonRepl(ReplImplementation):

@rule(level=LogLevel.DEBUG)
async def create_ipython_repl_request(
repl: IPythonRepl, ipython: IPython, pex_env: PexEnvironment
repl: IPythonRepl, ipython: IPython, pex_env: WorkspacePexEnvironment
) -> ReplRequest:
# Note that we get an intermediate PexRequest here (instead of going straight to a Pex)
# so that we can get the interpreter constraints for use in ipython_request.
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/python/goals/run_pex_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
ResolvePexEntryPointRequest,
)
from pants.backend.python.util_rules.pex import Pex, PexRequest
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_environment import WorkspacePexEnvironment
from pants.backend.python.util_rules.pex_from_targets import PexFromTargetsRequest
from pants.backend.python.util_rules.python_sources import (
PythonSourceFiles,
Expand All @@ -28,7 +28,7 @@
async def create_pex_binary_run_request(
field_set: PexBinaryFieldSet,
pex_binary_defaults: PexBinaryDefaults,
pex_env: PexEnvironment,
pex_env: WorkspacePexEnvironment,
) -> RunRequest:
entry_point, transitive_targets = await MultiGet(
Get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import json
import os
from textwrap import dedent
from typing import Optional

Expand Down Expand Up @@ -124,3 +125,36 @@ def test_no_strip_pex_env_issues_12057() -> None:
]
result = run_pants(args)
assert result.exit_code == 42, result.stderr


def test_no_leak_pex_root_issues_12055() -> None:
read_config_result = run_pants(["help-all"])
read_config_result.assert_success()
config_data = json.loads(read_config_result.stdout)
global_advanced_options = {
option["config_key"]: [
ranked_value["value"] for ranked_value in option["value_history"]["ranked_values"]
][-1]
for option in config_data["scope_to_help_info"][""]["advanced"]
}
named_caches_dir = global_advanced_options["named_caches_dir"]

sources = {
"src/app.py": "import os; print(os.environ['PEX_ROOT'])",
"src/BUILD": dedent(
"""\
python_library(name="lib")
pex_binary(entry_point="app.py")
"""
),
}
with setup_tmpdir(sources) as tmpdir:
args = [
"--backend-packages=pants.backend.python",
f"--source-root-patterns=['/{tmpdir}/src']",
"run",
f"{tmpdir}/src/app.py",
]
result = run_pants(args)
result.assert_success()
assert os.path.join(named_caches_dir, "pex_root") == result.stdout.strip()
22 changes: 12 additions & 10 deletions src/python/pants/backend/python/util_rules/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.pex_cli import PexCliProcess, PexPEX
from pants.backend.python.util_rules.pex_environment import (
PexEnvironment,
PexRuntimeEnvironment,
PythonExecutable,
SandboxPexEnvironment,
)
from pants.engine.collection import Collection, DeduplicatedCollection
from pants.engine.engine_aware import EngineAwareParameter
Expand Down Expand Up @@ -459,7 +459,7 @@ class VenvScriptWriter:
def _create_venv_script(
self,
bash: BashBinary,
pex_environment: PexEnvironment,
pex_environment: SandboxPexEnvironment,
*,
script_path: PurePath,
venv_executable: PurePath,
Expand Down Expand Up @@ -503,14 +503,16 @@ def _create_venv_script(
content=FileContent(path=str(script_path), content=script.encode(), is_executable=True),
)

def exe(self, bash: BashBinary, pex_environment: PexEnvironment) -> VenvScript:
def exe(self, bash: BashBinary, pex_environment: SandboxPexEnvironment) -> VenvScript:
"""Writes a safe shim for the venv's executable `pex` script."""
script_path = PurePath(f"{self.pex.name}_pex_shim.sh")
return self._create_venv_script(
bash, pex_environment, script_path=script_path, venv_executable=self.venv_dir / "pex"
)

def bin(self, bash: BashBinary, pex_environment: PexEnvironment, name: str) -> VenvScript:
def bin(
self, bash: BashBinary, pex_environment: SandboxPexEnvironment, name: str
) -> VenvScript:
"""Writes a safe shim for an executable or script in the venv's `bin` directory."""
script_path = PurePath(f"{self.pex.name}_bin_{name}_shim.sh")
return self._create_venv_script(
Expand All @@ -520,7 +522,7 @@ def bin(self, bash: BashBinary, pex_environment: PexEnvironment, name: str) -> V
venv_executable=self.venv_dir / "bin" / name,
)

def python(self, bash: BashBinary, pex_environment: PexEnvironment) -> VenvScript:
def python(self, bash: BashBinary, pex_environment: SandboxPexEnvironment) -> VenvScript:
"""Writes a safe shim for the venv's python binary."""
return self.bin(bash, pex_environment, "python")

Expand Down Expand Up @@ -558,7 +560,7 @@ def wrap_venv_prex_request(pex_request: PexRequest) -> VenvPexRequest:

@rule
async def create_venv_pex(
request: VenvPexRequest, bash: BashBinary, pex_environment: PexEnvironment
request: VenvPexRequest, bash: BashBinary, pex_environment: SandboxPexEnvironment
) -> VenvPex:
# VenvPex is motivated by improving performance of Python tools by eliminating traditional PEX
# file startup overhead.
Expand Down Expand Up @@ -707,7 +709,7 @@ def __init__(


@rule
async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment) -> Process:
async def setup_pex_process(request: PexProcess, pex_environment: SandboxPexEnvironment) -> Process:
pex = request.pex
argv = pex_environment.create_argv(pex.name, *request.argv, python=pex.python)
env = {
Expand All @@ -727,7 +729,7 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment
env=env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=pex_environment.append_only_caches(),
append_only_caches=pex_environment.append_only_caches,
timeout_seconds=request.timeout_seconds,
execution_slot_variable=request.execution_slot_variable,
cache_scope=request.cache_scope,
Expand Down Expand Up @@ -779,7 +781,7 @@ def __init__(

@rule
async def setup_venv_pex_process(
request: VenvPexProcess, pex_environment: PexEnvironment
request: VenvPexProcess, pex_environment: SandboxPexEnvironment
) -> Process:
venv_pex = request.venv_pex
argv = (venv_pex.pex.argv0, *request.argv)
Expand All @@ -796,7 +798,7 @@ async def setup_venv_pex_process(
env=request.extra_env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=pex_environment.append_only_caches(),
append_only_caches=pex_environment.append_only_caches,
timeout_seconds=request.timeout_seconds,
execution_slot_variable=request.execution_slot_variable,
cache_scope=request.cache_scope,
Expand Down
6 changes: 3 additions & 3 deletions src/python/pants/backend/python/util_rules/pex_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from pants.backend.python.subsystems.python_native_code import PythonNativeCode
from pants.backend.python.util_rules import pex_environment
from pants.backend.python.util_rules.pex_environment import (
PexEnvironment,
PexRuntimeEnvironment,
PythonExecutable,
SandboxPexEnvironment,
)
from pants.core.util_rules import external_tool
from pants.core.util_rules.external_tool import (
Expand Down Expand Up @@ -113,7 +113,7 @@ async def download_pex_pex(pex_binary: PexBinary) -> PexPEX:
async def setup_pex_cli_process(
request: PexCliProcess,
pex_binary: PexPEX,
pex_env: PexEnvironment,
pex_env: SandboxPexEnvironment,
python_native_code: PythonNativeCode,
global_options: GlobalOptions,
pex_runtime_env: PexRuntimeEnvironment,
Expand Down Expand Up @@ -179,7 +179,7 @@ async def setup_pex_cli_process(
env=env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=pex_env.append_only_caches(),
append_only_caches=pex_env.append_only_caches,
level=request.level,
cache_scope=request.cache_scope,
)
Expand Down
123 changes: 85 additions & 38 deletions src/python/pants/backend/python/util_rules/pex_environment.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import os
from dataclasses import dataclass
from pathlib import PurePath
from pathlib import Path, PurePath
from textwrap import dedent
from typing import Mapping, Optional, Tuple, cast

Expand Down Expand Up @@ -106,45 +108,9 @@ class PexEnvironment(EngineAwareReturnType):
path: Tuple[str, ...]
interpreter_search_paths: Tuple[str, ...]
subprocess_environment_dict: FrozenDict[str, str]
named_caches_dir: PurePath
bootstrap_python: Optional[PythonExecutable] = None

def create_argv(
self, pex_filepath: str, *args: str, python: Optional[PythonExecutable] = None
) -> Tuple[str, ...]:
python = python or self.bootstrap_python
if python:
return (python.path, pex_filepath, *args)
if os.path.basename(pex_filepath) == pex_filepath:
return (f"./{pex_filepath}", *args)
return (pex_filepath, *args)

def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]:
"""The environment to use for running anything with PEX.
If the Process is run with a pre-selected Python interpreter, set `python_configured=True`
to avoid PEX from trying to find a new interpreter.
"""
d = dict(
PATH=create_path_env_var(self.path),
PEX_INHERIT_PATH="false",
PEX_IGNORE_RCFILES="true",
PEX_ROOT=str(self.pex_root),
**self.subprocess_environment_dict,
)
# NB: We only set `PEX_PYTHON_PATH` if the Python interpreter has not already been
# pre-selected by Pants. Otherwise, Pex would inadvertently try to find another interpreter
# when running PEXes. (Creating a PEX will ignore this env var in favor of `--python-path`.)
if not python_configured:
d["PEX_PYTHON_PATH"] = create_path_env_var(self.interpreter_search_paths)
return d

@property
def pex_root(self) -> PurePath:
return PurePath(".cache/pex_root")

def append_only_caches(self) -> Mapping[str, str]:
return {"pex_root": str(self.pex_root)}

def level(self) -> LogLevel:
return LogLevel.DEBUG if self.bootstrap_python else LogLevel.WARN

Expand Down Expand Up @@ -233,9 +199,90 @@ def first_python_binary() -> Optional[PythonExecutable]:
python_setup.interpreter_search_paths(pex_relevant_environment)
),
subprocess_environment_dict=subprocess_env_vars.vars,
# TODO: This path normalization is duplicated with `engine_initializer.py`. How can we do
# the normalization only once, via the options system?
named_caches_dir=Path(global_options.options.named_caches_dir).resolve(),
bootstrap_python=first_python_binary(),
)


@dataclass(frozen=True)
class CompletePexEnvironment:
_pex_environment: PexEnvironment
pex_root: PurePath

_PEX_ROOT_DIRNAME = "pex_root"

@property
def interpreter_search_paths(self) -> Tuple[str, ...]:
return self._pex_environment.interpreter_search_paths

def create_argv(
self, pex_filepath: str, *args: str, python: Optional[PythonExecutable] = None
) -> Tuple[str, ...]:
python = python or self._pex_environment.bootstrap_python
if python:
return (python.path, pex_filepath, *args)
if os.path.basename(pex_filepath) == pex_filepath:
return (f"./{pex_filepath}", *args)
return (pex_filepath, *args)

def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]:
"""The environment to use for running anything with PEX.
If the Process is run with a pre-selected Python interpreter, set `python_configured=True`
to avoid PEX from trying to find a new interpreter.
"""
d = dict(
PATH=create_path_env_var(self._pex_environment.path),
PEX_INHERIT_PATH="false",
PEX_IGNORE_RCFILES="true",
PEX_ROOT=str(self.pex_root),
**self._pex_environment.subprocess_environment_dict,
)
# NB: We only set `PEX_PYTHON_PATH` if the Python interpreter has not already been
# pre-selected by Pants. Otherwise, Pex would inadvertently try to find another interpreter
# when running PEXes. (Creating a PEX will ignore this env var in favor of `--python-path`.)
if not python_configured:
d["PEX_PYTHON_PATH"] = create_path_env_var(self.interpreter_search_paths)
return d


@dataclass(frozen=True)
class SandboxPexEnvironment(CompletePexEnvironment):
append_only_caches: FrozenDict[str, str]

@classmethod
def create(cls, pex_environment) -> SandboxPexEnvironment:
pex_root = PurePath(".cache") / cls._PEX_ROOT_DIRNAME
return cls(
_pex_environment=pex_environment,
pex_root=pex_root,
append_only_caches=FrozenDict({cls._PEX_ROOT_DIRNAME: str(pex_root)}),
)


class WorkspacePexEnvironment(CompletePexEnvironment):
@classmethod
def create(cls, pex_environment: PexEnvironment) -> WorkspacePexEnvironment:
# N.B.: When running in the workspace the engine doesn't offer an append_only_caches
# service to setup a symlink to our named cache for us. As such, we point the PEX_ROOT
# directly at the underlying append only cache in that case to re-use results there and
# to keep the workspace from being dirtied by the creation of a new Pex cache rooted
# there.
pex_root = pex_environment.named_caches_dir / cls._PEX_ROOT_DIRNAME
return cls(_pex_environment=pex_environment, pex_root=pex_root)


@rule
def sandbox_pex_environment(pex_environment: PexEnvironment) -> SandboxPexEnvironment:
return SandboxPexEnvironment.create(pex_environment)


@rule
def workspace_pex_environment(pex_environment: PexEnvironment) -> WorkspacePexEnvironment:
return WorkspacePexEnvironment.create(pex_environment)


def rules():
return [*collect_rules(), *process.rules(), *subprocess_environment.rules()]

0 comments on commit 0a01eaf

Please sign in to comment.