Skip to content

Commit

Permalink
Add "workspace invalidation" sources support for shell / adhoc backen…
Browse files Browse the repository at this point in the history
…ds (pantsbuild#21051)

Add support for "workspace invalidation" sources for the `adhoc_tool`
and `shell_command` target types. This supports allows those targets to
depend on the content of files in the repository without materializing
those sources in the execution sandbox. This support is intended to be
used in conjunction with the workspace environment where execution does
not take place in a sandbox.

The new field `workspace_invalidation_sources` on both target types is a
list of globs into the repository. The digest of the referenced files
will be inserted as an environment variable in the process executed
(which makes it part of the process's cache key).
  • Loading branch information
tdyas committed Jun 17, 2024
1 parent 8114a0b commit b2db430
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 8 deletions.
2 changes: 2 additions & 0 deletions docs/docs/using-pants/environments.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ Thus, Pants puts that burden on you, the Pants user, to ensure a process output

If a process isn't reproducible, re-running a build from the same source code could fail unexpectedly, or give different output to an earlier build.

You should use the `workspace_invalidation_sources` field available on the `adhoc_tool` and `shell_command` target types to inform Pants of what files should cause re-execution of the target's process if they change.

:::

The special environment name `__local_workspace__` can be used to select a matching `experimental_workspace_environment` based on its `compatible_platforms` attribute.
Expand Down
18 changes: 17 additions & 1 deletion src/python/pants/backend/adhoc/adhoc_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import logging
import os

from pants.backend.adhoc.target_types import (
AdhocToolArgumentsField,
Expand All @@ -19,7 +20,9 @@
AdhocToolStderrFilenameField,
AdhocToolStdoutFilenameField,
AdhocToolWorkdirField,
AdhocToolWorkspaceInvalidationSourcesField,
)
from pants.base.glob_match_error_behavior import GlobMatchErrorBehavior
from pants.core.target_types import FileSourceField
from pants.core.util_rules.adhoc_process_support import (
AdhocProcessRequest,
Expand All @@ -30,7 +33,7 @@
from pants.core.util_rules.adhoc_process_support import rules as adhoc_process_support_rules
from pants.core.util_rules.environments import EnvironmentNameRequest, EnvironmentTarget
from pants.engine.environment import EnvironmentName
from pants.engine.fs import Digest, Snapshot
from pants.engine.fs import Digest, PathGlobs, Snapshot
from pants.engine.rules import Get, collect_rules, rule
from pants.engine.target import GeneratedSources, GenerateSourcesRequest
from pants.engine.unions import UnionRule
Expand Down Expand Up @@ -81,6 +84,18 @@ async def run_in_sandbox_request(

cache_scope = env_target.default_cache_scope

workspace_invalidation_globs: PathGlobs | None = None
workspace_invalidation_sources = (
target.get(AdhocToolWorkspaceInvalidationSourcesField).value or ()
)
if workspace_invalidation_sources:
spec_path = target.address.spec_path
workspace_invalidation_globs = PathGlobs(
globs=(os.path.join(spec_path, glob) for glob in workspace_invalidation_sources),
glob_match_error_behavior=GlobMatchErrorBehavior.error,
description_of_origin=f"`{AdhocToolWorkspaceInvalidationSourcesField.alias}` for `adhoc_tool` target at `{target.address}`",
)

process_request = AdhocProcessRequest(
description=description,
address=target.address,
Expand All @@ -99,6 +114,7 @@ async def run_in_sandbox_request(
log_output=target[AdhocToolLogOutputField].value,
capture_stderr_file=target[AdhocToolStderrFilenameField].value,
capture_stdout_file=target[AdhocToolStdoutFilenameField].value,
workspace_invalidation_globs=workspace_invalidation_globs,
cache_scope=cache_scope,
)

Expand Down
46 changes: 43 additions & 3 deletions src/python/pants/backend/adhoc/adhoc_tool_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,27 @@ def rule_runner() -> PythonRuleRunner:
PythonSourceTarget,
LocalWorkspaceEnvironmentTarget,
],
isolated_local_store=True,
)
rule_runner.set_options([], env_inherit={"PATH"})
return rule_runner


def execute_adhoc_tool(
rule_runner: PythonRuleRunner,
address: Address,
) -> GeneratedSources:
generator_type: type[GenerateSourcesRequest] = GenerateFilesFromAdhocToolRequest
target = rule_runner.get_target(address)
return rule_runner.request(GeneratedSources, [generator_type(EMPTY_SNAPSHOT, target)])


def assert_adhoc_tool_result(
rule_runner: PythonRuleRunner,
address: Address,
expected_contents: dict[str, str],
) -> None:
generator_type: type[GenerateSourcesRequest] = GenerateFilesFromAdhocToolRequest
target = rule_runner.get_target(address)
result = rule_runner.request(GeneratedSources, [generator_type(EMPTY_SNAPSHOT, target)])
result = execute_adhoc_tool(rule_runner, address)
assert result.snapshot.files == tuple(expected_contents)
contents = rule_runner.request(DigestContents, [result.snapshot.digest])
for fc in contents:
Expand Down Expand Up @@ -334,3 +342,35 @@ def test_adhoc_tool_with_workspace_execution(rule_runner: PythonRuleRunner) -> N
workspace_output_path = Path(rule_runner.build_root).joinpath("foo.txt")
assert workspace_output_path.exists()
assert workspace_output_path.read_text().strip() == "workspace"


def test_adhoc_tool_workspace_invalidation_sources(rule_runner: PythonRuleRunner) -> None:
rule_runner.write_files(
{
"src/BUILD": dedent(
"""\
system_binary(name="bash", binary_name="bash")
adhoc_tool(
name="cmd",
runnable=":bash",
# Use a random value so we can detect when re-execution occurs.
args=["-c", "echo $RANDOM > out.log"],
output_files=["out.log"],
workspace_invalidation_sources=['a-file'],
)
"""
),
"src/a-file": "",
}
)
address = Address("src", target_name="cmd")

# Re-executing the initial execution should be cached.
result1 = execute_adhoc_tool(rule_runner, address)
result2 = execute_adhoc_tool(rule_runner, address)
assert result1.snapshot == result2.snapshot

# Update the hash-only source file's content. The adhoc_tool should be re-executed now.
(Path(rule_runner.build_root) / "src" / "a-file").write_text("xyzzy")
result3 = execute_adhoc_tool(rule_runner, address)
assert result1.snapshot != result3.snapshot
22 changes: 22 additions & 0 deletions src/python/pants/backend/adhoc/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,27 @@ class AdhocToolOutputRootDirField(StringField):
)


class AdhocToolWorkspaceInvalidationSourcesField(StringSequenceField):
alias: ClassVar[str] = "workspace_invalidation_sources"
help = help_text(
"""
Path globs for source files on which this target depends and for which any changes should cause
this target's process to be re-executed. Unlike ordinary dependencies, the files referenced by
`workspace_invalidation_sources` globs are not materialized into any execution sandbox
and are referenced solely for cache invalidation purposes.
Note: This field is intended to work with the in-workspace execution environment configured by
the `workspace_environment` target type. It should only be used when the configured
environment for a target is a `workspace_environment`.
Implementation: Pants computes a digest of all of the files referenced by the provided globs
and injects that digest into the process as an environment variable. Since environment variables
are part of the cache key for a process's execution, any changes to the referenced files will
change the digest and thus force re-exection of the process.
"""
)


class AdhocToolTarget(Target):
alias: ClassVar[str] = "adhoc_tool"
core_fields = (
Expand All @@ -272,6 +293,7 @@ class AdhocToolTarget(Target):
AdhocToolOutputRootDirField,
AdhocToolStdoutFilenameField,
AdhocToolStderrFilenameField,
AdhocToolWorkspaceInvalidationSourcesField,
EnvironmentField,
)
help = help_text(
Expand Down
6 changes: 6 additions & 0 deletions src/python/pants/backend/shell/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
AdhocToolRunnableDependenciesField,
AdhocToolTimeoutField,
AdhocToolWorkdirField,
AdhocToolWorkspaceInvalidationSourcesField,
)
from pants.backend.shell.subsystems.shell_setup import ShellSetup
from pants.core.goals.test import RuntimePackageDependenciesField, TestTimeoutField
Expand Down Expand Up @@ -379,6 +380,10 @@ class ShellCommandNamedCachesField(AdhocToolNamedCachesField):
pass


class ShellCommandWorkspaceInvalidationSourcesField(AdhocToolWorkspaceInvalidationSourcesField):
pass


class SkipShellCommandTestsField(BoolField):
alias = "skip_tests"
default = False
Expand All @@ -403,6 +408,7 @@ class ShellCommandTarget(Target):
ShellCommandWorkdirField,
ShellCommandNamedCachesField,
ShellCommandOutputRootDirField,
ShellCommandWorkspaceInvalidationSourcesField,
EnvironmentField,
)
help = help_text(
Expand Down
18 changes: 17 additions & 1 deletion src/python/pants/backend/shell/util_rules/shell_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import logging
import os
import shlex
from dataclasses import dataclass

Expand All @@ -24,8 +25,10 @@
ShellCommandTimeoutField,
ShellCommandToolsField,
ShellCommandWorkdirField,
ShellCommandWorkspaceInvalidationSourcesField,
)
from pants.backend.shell.util_rules.builtin import BASH_BUILTIN_COMMANDS
from pants.base.glob_match_error_behavior import GlobMatchErrorBehavior
from pants.core.goals.run import RunFieldSet, RunInSandboxBehavior, RunRequest
from pants.core.target_types import FileSourceField
from pants.core.util_rules.adhoc_process_support import (
Expand All @@ -41,7 +44,7 @@
from pants.core.util_rules.environments import EnvironmentNameRequest, EnvironmentTarget
from pants.core.util_rules.system_binaries import BashBinary, BinaryShims, BinaryShimsRequest
from pants.engine.environment import EnvironmentName
from pants.engine.fs import Digest, Snapshot
from pants.engine.fs import Digest, PathGlobs, Snapshot
from pants.engine.internals.native_engine import EMPTY_DIGEST
from pants.engine.process import Process
from pants.engine.rules import Get, collect_rules, rule
Expand Down Expand Up @@ -149,6 +152,18 @@ async def _prepare_process_request_from_target(

cache_scope = env_target.default_cache_scope

workspace_invalidation_globs: PathGlobs | None = None
workspace_invalidation_sources = (
shell_command.get(ShellCommandWorkspaceInvalidationSourcesField).value or ()
)
if workspace_invalidation_sources:
spec_path = shell_command.address.spec_path
workspace_invalidation_globs = PathGlobs(
globs=(os.path.join(spec_path, glob) for glob in workspace_invalidation_sources),
glob_match_error_behavior=GlobMatchErrorBehavior.error,
description_of_origin=f"`{ShellCommandWorkspaceInvalidationSourcesField.alias}` for `shell_command` target at `{shell_command.address}`",
)

return AdhocProcessRequest(
description=description,
address=shell_command.address,
Expand All @@ -167,6 +182,7 @@ async def _prepare_process_request_from_target(
log_output=shell_command[ShellCommandLogOutputField].value,
capture_stdout_file=None,
capture_stderr_file=None,
workspace_invalidation_globs=workspace_invalidation_globs,
cache_scope=cache_scope,
)

Expand Down
46 changes: 43 additions & 3 deletions src/python/pants/backend/shell/util_rules/shell_command_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import shlex
import time
from pathlib import Path
from textwrap import dedent

Expand Down Expand Up @@ -69,19 +70,27 @@ def rule_runner() -> RuleRunner:
FilesGeneratorTarget,
LocalWorkspaceEnvironmentTarget,
],
isolated_local_store=True,
)
rule_runner.set_options([], env_inherit={"PATH"})
return rule_runner


def execute_shell_command(
rule_runner: RuleRunner,
address: Address,
) -> GeneratedSources:
generator_type: type[GenerateSourcesRequest] = GenerateFilesFromShellCommandRequest
target = rule_runner.get_target(address)
return rule_runner.request(GeneratedSources, [generator_type(EMPTY_SNAPSHOT, target)])


def assert_shell_command_result(
rule_runner: RuleRunner,
address: Address,
expected_contents: dict[str, str],
) -> None:
generator_type: type[GenerateSourcesRequest] = GenerateFilesFromShellCommandRequest
target = rule_runner.get_target(address)
result = rule_runner.request(GeneratedSources, [generator_type(EMPTY_SNAPSHOT, target)])
result = execute_shell_command(rule_runner, address)
assert result.snapshot.files == tuple(expected_contents)
contents = rule_runner.request(DigestContents, [result.snapshot.digest])
for fc in contents:
Expand Down Expand Up @@ -871,3 +880,34 @@ def test_shell_command_with_workspace_execution(rule_runner: RuleRunner) -> None
workspace_output_path = Path(rule_runner.build_root).joinpath("foo.txt")
assert workspace_output_path.exists()
assert workspace_output_path.read_text().strip() == "workspace"


def test_shell_command_workspace_invalidation_sources(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
"src/BUILD": dedent(
"""\
shell_command(
name="cmd",
# Use a random value so we can detect when re-execution occurs.
command='echo $RANDOM > out.log',
output_files=["out.log"],
workspace_invalidation_sources=['a-file'],
)
"""
),
"src/a-file": "",
}
)
address = Address("src", target_name="cmd")

# Re-executing the initial execution should be cached.
result1 = execute_shell_command(rule_runner, address)
result2 = execute_shell_command(rule_runner, address)
assert result1.snapshot == result2.snapshot

# Update the hash-only source file's content. The shell_command should be re-executed now.
(Path(rule_runner.build_root) / "src" / "a-file").write_text("xyzzy")
time.sleep(0.100)
result3 = execute_shell_command(rule_runner, address)
assert result1.snapshot != result3.snapshot
11 changes: 11 additions & 0 deletions src/python/pants/core/util_rules/adhoc_process_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Directory,
FileContent,
MergeDigests,
PathGlobs,
Snapshot,
)
from pants.engine.internals.native_engine import AddressInput, RemovePrefix
Expand Down Expand Up @@ -71,6 +72,7 @@ class AdhocProcessRequest:
log_output: bool
capture_stdout_file: str | None
capture_stderr_file: str | None
workspace_invalidation_globs: PathGlobs | None
cache_scope: ProcessCacheScope | None = None


Expand Down Expand Up @@ -567,6 +569,15 @@ async def prepare_adhoc_process(
if supplied_env_vars:
command_env.update(supplied_env_vars)

# Compute the digest for any workspace invalidation sources and put the digest into the environment as a dummy variable
# so that the process produced by this rule will be invalidated if any of the referenced files change.
if request.workspace_invalidation_globs is not None:
workspace_invalidation_digest = await Get(
Digest, PathGlobs, request.workspace_invalidation_globs
)
digest_str = f"{workspace_invalidation_digest.fingerprint}-{workspace_invalidation_digest.serialized_bytes_length}"
command_env["__PANTS_WORKSPACE_INVALIDATION_SOURCES_DIGEST"] = digest_str

input_snapshot = await Get(Snapshot, Digest, request.input_digest)

if not working_directory or working_directory in input_snapshot.dirs:
Expand Down

0 comments on commit b2db430

Please sign in to comment.