Skip to content

Commit

Permalink
Speed up run to no longer rebuild a Pex on source file changes (#10410
Browse files Browse the repository at this point in the history
)

Whereas `binary` must include source files in the PEX, `run` does not need to. We get less cache invalidation and generally faster performance by instead having the chroot simply be populated with the source files, similar to how we implement Pytest.

We still use a Pex to handle the `entry_point` field and to resolve all 3rd party requirements.

Before, with a whitespace change:
```
▶ /usr/bin/time ./pants run build-support/bin/generate_travis_yml.py > .travis.yml
        2.72 real         0.73 user         0.21 sys
```

After, with a whitespace change:
```
▶ /usr/bin/time ./pants run build-support/bin/generate_travis_yml.py > .travis.yml
        1.87 real         0.73 user         0.21 sys
```

Implements half of #10406.
  • Loading branch information
Eric-Arellano authored Jul 21, 2020
1 parent 2eb679a commit 64bfcbb
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 15 deletions.
6 changes: 4 additions & 2 deletions src/python/pants/backend/python/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@
from pants.backend.python.python_requirements import PythonRequirements
from pants.backend.python.rules import (
coverage,
create_python_binary,
download_pex_bin,
inject_ancestor_files,
inject_init,
pex,
pex_from_targets,
pytest_runner,
python_create_binary,
python_sources,
repl,
run_python_binary,
run_setup_py,
)
from pants.backend.python.subsystems import python_native_code, subprocess_environment
Expand Down Expand Up @@ -62,9 +63,10 @@ def rules():
*pex.rules(),
*pex_from_targets.rules(),
*pytest_runner.rules(),
*python_create_binary.rules(),
*create_python_binary.rules(),
*python_native_code.rules(),
*repl.rules(),
*run_python_binary.rules(),
*run_setup_py.rules(),
*subprocess_environment.rules(),
)
Expand Down
72 changes: 72 additions & 0 deletions src/python/pants/backend/python/rules/run_python_binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pants.backend.python.rules.create_python_binary import PythonBinaryFieldSet
from pants.backend.python.rules.pex import Pex, PexPlatforms
from pants.backend.python.rules.pex_from_targets import PexFromTargetsRequest
from pants.backend.python.rules.python_sources import (
UnstrippedPythonSources,
UnstrippedPythonSourcesRequest,
)
from pants.backend.python.target_types import PythonBinaryDefaults, PythonBinarySources
from pants.core.goals.binary import BinaryFieldSet
from pants.core.goals.run import RunRequest
from pants.core.util_rules.determine_source_files import AllSourceFilesRequest, SourceFiles
from pants.engine.addresses import Addresses
from pants.engine.fs import Digest, MergeDigests
from pants.engine.rules import SubsystemRule, rule
from pants.engine.selectors import Get, MultiGet
from pants.engine.target import InvalidFieldException, TransitiveTargets
from pants.engine.unions import UnionRule


@rule
async def run_python_binary(
field_set: PythonBinaryFieldSet, python_binary_defaults: PythonBinaryDefaults
) -> RunRequest:
entry_point = field_set.entry_point.value
if entry_point is None:
binary_sources = await Get(
SourceFiles, AllSourceFilesRequest([field_set.sources], strip_source_roots=True)
)
entry_point = PythonBinarySources.translate_source_file_to_entry_point(binary_sources.files)
if entry_point is None:
raise InvalidFieldException(
"You must either specify `sources` or `entry_point` for the `python_binary` target "
f"{repr(field_set.address)} in order to run it, but both fields were undefined."
)

transitive_targets = await Get(TransitiveTargets, Addresses([field_set.address]))

output_filename = f"{field_set.address.target_name}.pex"
pex_request = Get(
Pex,
PexFromTargetsRequest(
addresses=Addresses([field_set.address]),
platforms=PexPlatforms.create_from_platforms_field(field_set.platforms),
output_filename=output_filename,
additional_args=field_set.generate_additional_args(python_binary_defaults),
include_source_files=False,
),
)
source_files_request = Get(
UnstrippedPythonSources,
UnstrippedPythonSourcesRequest(transitive_targets.closure, include_files=True),
)
pex, source_files = await MultiGet(pex_request, source_files_request)

merged_digest = await Get(Digest, MergeDigests([pex.digest, source_files.snapshot.digest]))
return RunRequest(
digest=merged_digest,
binary_name=pex.output_filename,
extra_args=("-m", entry_point),
env={"PEX_EXTRA_SYS_PATH": ":".join(source_files.source_roots)},
)


def rules():
return [
run_python_binary,
UnionRule(BinaryFieldSet, PythonBinaryFieldSet),
SubsystemRule(PythonBinaryDefaults),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pathlib import Path
from textwrap import dedent

from pants.base.build_environment import get_buildroot
from pants.testutil.pants_run_integration_test import PantsRunIntegrationTest
from pants.util.contextutil import temporary_dir


class RunPythonBinaryIntegrationTest(PantsRunIntegrationTest):
def test_sample_script(self) -> None:
"""Test that we properly run a `python_binary` target.
This checks a few things:
- We can handle source roots.
- We properly load third party requirements.
- We propagate the error code.
"""
with temporary_dir(root_dir=get_buildroot()) as tmpdir:
tmpdir_relative = Path(tmpdir).relative_to(get_buildroot())

src_root1 = Path(tmpdir, "src_root1/project")
src_root1.mkdir(parents=True)
(src_root1 / "app.py").write_text(
dedent(
"""\
import sys
from utils.strutil import upper_case
if __name__ == "__main__":
print(upper_case("Hello world."))
print("Hola, mundo.", file=sys.stderr)
sys.exit(23)
"""
)
)
(src_root1 / "BUILD").write_text("python_binary(sources=['app.py'])")

src_root2 = Path(tmpdir, "src_root2/utils")
src_root2.mkdir(parents=True)
(src_root2 / "strutil.py").write_text(
dedent(
"""\
def upper_case(s):
return s.upper()
"""
)
)
(src_root2 / "BUILD").write_text("python_library()")
result = self.run_pants(
[
"--dependency-inference",
(
f"--source-root-patterns=['/{tmpdir_relative}/src_root1', "
f"'/{tmpdir_relative}/src_root2']"
),
"--pants-ignore=__pycache__",
"run",
f"{tmpdir_relative}/src_root1/project/app.py",
]
)

assert result.returncode == 23
assert result.stdout_data == "HELLO WORLD.\n"
assert "Hola, mundo.\n" in result.stderr_data
42 changes: 36 additions & 6 deletions src/python/pants/core/goals/run.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from dataclasses import dataclass
from pathlib import PurePath
from typing import Iterable, Mapping, Optional, Tuple

from pants.base.build_root import BuildRoot
from pants.core.goals.binary import BinaryFieldSet, CreatedBinary
from pants.core.goals.binary import BinaryFieldSet
from pants.engine.console import Console
from pants.engine.fs import DirectoryToMaterialize, Workspace
from pants.engine.fs import Digest, DirectoryToMaterialize, Workspace
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.interactive_process import InteractiveProcess, InteractiveRunner
from pants.engine.rules import goal_rule
Expand All @@ -15,6 +17,30 @@
from pants.option.custom_types import shell_str
from pants.option.global_options import GlobalOptions
from pants.util.contextutil import temporary_dir
from pants.util.frozendict import FrozenDict
from pants.util.meta import frozen_after_init


@frozen_after_init
@dataclass(unsafe_hash=True)
class RunRequest:
digest: Digest
binary_name: str
extra_args: Tuple[str, ...]
env: FrozenDict[str, str]

def __init__(
self,
*,
digest: Digest,
binary_name: str,
extra_args: Optional[Iterable[str]] = None,
env: Optional[Mapping[str, str]] = None,
) -> None:
self.digest = digest
self.binary_name = binary_name
self.extra_args = tuple(extra_args or ())
self.env = FrozenDict(env or {})


class RunOptions(GoalSubsystem):
Expand Down Expand Up @@ -65,17 +91,21 @@ async def run(
),
)
field_set = targets_to_valid_field_sets.field_sets[0]
binary = await Get(CreatedBinary, BinaryFieldSet, field_set)
request = await Get(RunRequest, BinaryFieldSet, field_set)

workdir = global_options.options.pants_workdir
with temporary_dir(root_dir=workdir, cleanup=True) as tmpdir:
path_relative_to_build_root = PurePath(tmpdir).relative_to(build_root.path).as_posix()
workspace.materialize_directory(
DirectoryToMaterialize(binary.digest, path_prefix=path_relative_to_build_root)
DirectoryToMaterialize(request.digest, path_prefix=path_relative_to_build_root)
)

full_path = PurePath(tmpdir, binary.binary_name).as_posix()
process = InteractiveProcess(argv=(full_path, *options.values.args), run_in_workspace=True)
full_path = PurePath(tmpdir, request.binary_name).as_posix()
process = InteractiveProcess(
argv=(full_path, *request.extra_args, *options.values.args),
env=request.env,
run_in_workspace=True,
)
try:
result = interactive_runner.run_process(process)
exit_code = result.exit_code
Expand Down
14 changes: 7 additions & 7 deletions src/python/pants/core/goals/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from pants.base.build_root import BuildRoot
from pants.base.specs import SingleAddress
from pants.core.goals.binary import BinaryFieldSet, CreatedBinary
from pants.core.goals.run import Run, RunOptions, run
from pants.core.goals.binary import BinaryFieldSet
from pants.core.goals.run import Run, RunOptions, RunRequest, run
from pants.engine.addresses import Address
from pants.engine.fs import CreateDigest, Digest, FileContent, Workspace
from pants.engine.interactive_process import InteractiveProcess, InteractiveRunner
Expand All @@ -28,14 +28,14 @@


class RunTest(TestBase):
def create_mock_binary(self, program_text: bytes) -> CreatedBinary:
def create_mock_run_request(self, program_text: bytes) -> RunRequest:
digest = self.request_single_product(
Digest,
CreateDigest(
[FileContent(path="program.py", content=program_text, is_executable=True)]
),
)
return CreatedBinary(binary_name="program.py", digest=digest)
return RunRequest(digest=digest, binary_name="program.py")

def single_target_run(
self, *, console: MockConsole, program_text: bytes, address_spec: str,
Expand Down Expand Up @@ -74,9 +74,9 @@ class TestBinaryTarget(Target):
mock=lambda _: TargetsToValidFieldSets({target_with_origin: [field_set]}),
),
MockGet(
product_type=CreatedBinary,
product_type=RunRequest,
subject_type=TestBinaryFieldSet,
mock=lambda _: self.create_mock_binary(program_text),
mock=lambda _: self.create_mock_run_request(program_text),
),
],
)
Expand All @@ -92,7 +92,7 @@ def test_normal_run(self) -> None:

def test_materialize_input_files(self) -> None:
program_text = b'#!/usr/bin/python\nprint("hello")'
binary = self.create_mock_binary(program_text)
binary = self.create_mock_run_request(program_text)
interactive_runner = InteractiveRunner(self.scheduler)
process = InteractiveProcess(
argv=("./program.py",), run_in_workspace=False, input_digest=binary.digest,
Expand Down

0 comments on commit 64bfcbb

Please sign in to comment.