Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of Owners rule #16563

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/python/pants/backend/java/goals/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pants.engine.rules import collect_rules, rule
from pants.engine.target import Target
from pants.engine.unions import UnionRule
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.util.logging import LogLevel


Expand All @@ -36,10 +36,8 @@ class PutativeJavaTargetsRequest(PutativeTargetsRequest):

def classify_source_files(paths: Iterable[str]) -> dict[type[Target], set[str]]:
"""Returns a dict of target type -> files that belong to targets of that type."""
tests_filespec = Filespec(includes=list(JavaTestsGeneratorSourcesField.default))
test_filenames = set(
matches_filespec(tests_filespec, paths=[os.path.basename(path) for path in paths])
)
tests_filespec_matcher = FilespecMatcher(JavaTestsGeneratorSourcesField.default, ())
test_filenames = set(tests_filespec_matcher.matches([os.path.basename(path) for path in paths]))
test_files = {path for path in paths if os.path.basename(path) in test_filenames}
sources_files = set(paths) - test_files
return {JunitTestsGeneratorTarget: test_files, JavaSourcesGeneratorTarget: sources_files}
Expand Down
6 changes: 3 additions & 3 deletions src/python/pants/backend/kotlin/goals/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pants.engine.rules import collect_rules, rule
from pants.engine.target import Target
from pants.engine.unions import UnionRule
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.util.logging import LogLevel


Expand All @@ -36,12 +36,12 @@ class PutativeKotlinTargetsRequest(PutativeTargetsRequest):

def classify_source_files(paths: Iterable[str]) -> dict[type[Target], set[str]]:
"""Returns a dict of target type -> files that belong to targets of that type."""
junit_filespec = Filespec(includes=list(KotlinJunitTestsGeneratorSourcesField.default))
junit_filespec_matcher = FilespecMatcher(KotlinJunitTestsGeneratorSourcesField.default, ())
junit_files = {
path
for path in paths
if os.path.basename(path)
in set(matches_filespec(junit_filespec, paths=[os.path.basename(path) for path in paths]))
in set(junit_filespec_matcher.matches([os.path.basename(path) for path in paths]))
}
sources_files = set(paths) - junit_files
return {
Expand Down
10 changes: 5 additions & 5 deletions src/python/pants/backend/python/goals/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pants.engine.rules import collect_rules, rule, rule_helper
from pants.engine.target import Target, UnexpandedTargets
from pants.engine.unions import UnionRule
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.source.source_root import SourceRootsRequest, SourceRootsResult
from pants.util.logging import LogLevel

Expand All @@ -53,13 +53,13 @@ class PutativePythonTargetsRequest(PutativeTargetsRequest):

def classify_source_files(paths: Iterable[str]) -> dict[type[Target], set[str]]:
"""Returns a dict of target type -> files that belong to targets of that type."""
tests_filespec = Filespec(includes=list(PythonTestsGeneratingSourcesField.default))
test_utils_filespec = Filespec(includes=list(PythonTestUtilsGeneratingSourcesField.default))
tests_filespec_matcher = FilespecMatcher(PythonTestsGeneratingSourcesField.default, ())
test_utils_filespec_matcher = FilespecMatcher(PythonTestUtilsGeneratingSourcesField.default, ())

path_to_file_name = {path: os.path.basename(path) for path in paths}
test_file_names = set(matches_filespec(tests_filespec, paths=path_to_file_name.values()))
test_file_names = set(tests_filespec_matcher.matches(list(path_to_file_name.values())))
test_util_file_names = set(
matches_filespec(test_utils_filespec, paths=path_to_file_name.values())
test_utils_filespec_matcher.matches(list(path_to_file_name.values()))
)

test_files = {
Expand Down
12 changes: 5 additions & 7 deletions src/python/pants/backend/scala/goals/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pants.engine.rules import collect_rules, rule
from pants.engine.target import Target
from pants.engine.unions import UnionRule
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.util.logging import LogLevel


Expand All @@ -38,21 +38,19 @@ class PutativeScalaTargetsRequest(PutativeTargetsRequest):

def classify_source_files(paths: Iterable[str]) -> dict[type[Target], set[str]]:
"""Returns a dict of target type -> files that belong to targets of that type."""
scalatest_filespec = Filespec(includes=list(ScalatestTestsGeneratorSourcesField.default))
junit_filespec = Filespec(includes=list(ScalaJunitTestsGeneratorSourcesField.default))
scalatest_filespec_matcher = FilespecMatcher(ScalatestTestsGeneratorSourcesField.default, ())
junit_filespec_matcher = FilespecMatcher(ScalaJunitTestsGeneratorSourcesField.default, ())
scalatest_files = {
path
for path in paths
if os.path.basename(path)
in set(
matches_filespec(scalatest_filespec, paths=[os.path.basename(path) for path in paths])
)
in set(scalatest_filespec_matcher.matches([os.path.basename(path) for path in paths]))
}
junit_files = {
path
for path in paths
if os.path.basename(path)
in set(matches_filespec(junit_filespec, paths=[os.path.basename(path) for path in paths]))
in set(junit_filespec_matcher.matches([os.path.basename(path) for path in paths]))
}
sources_files = set(paths) - scalatest_files - junit_files
return {
Expand Down
8 changes: 3 additions & 5 deletions src/python/pants/backend/shell/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pants.engine.rules import collect_rules, rule
from pants.engine.target import Target
from pants.engine.unions import UnionRule
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.util.logging import LogLevel


Expand All @@ -36,10 +36,8 @@ class PutativeShellTargetsRequest(PutativeTargetsRequest):

def classify_source_files(paths: Iterable[str]) -> dict[type[Target], set[str]]:
"""Returns a dict of target type -> files that belong to targets of that type."""
tests_filespec = Filespec(includes=list(Shunit2TestsGeneratorSourcesField.default))
test_filenames = set(
matches_filespec(tests_filespec, paths=[os.path.basename(path) for path in paths])
)
tests_filespec_matcher = FilespecMatcher(Shunit2TestsGeneratorSourcesField.default, ())
test_filenames = set(tests_filespec_matcher.matches([os.path.basename(path) for path in paths]))
test_files = {path for path in paths if os.path.basename(path) in test_filenames}
sources_files = set(paths) - test_files
return {Shunit2TestsGeneratorTarget: test_files, ShellSourcesGeneratorTarget: sources_files}
Expand Down
17 changes: 9 additions & 8 deletions src/python/pants/core/goals/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from pants.engine.unions import UnionMembership, union
from pants.option.global_options import GlobalOptions
from pants.option.option_types import BoolOption, DictOption, StrListOption, StrOption
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.util.docutil import bin_name, doc_url
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
Expand Down Expand Up @@ -362,8 +362,8 @@ def alias_for(self, standard_type: str) -> str | None:
def validate_build_file_name(self, build_file_patterns: tuple[str, ...]) -> None:
"""Check that the specified BUILD file name works with the repository's BUILD file
patterns."""
filespec = Filespec(includes=list(build_file_patterns))
if not bool(matches_filespec(filespec, paths=[self.build_file_name])):
filespec_matcher = FilespecMatcher(build_file_patterns, ())
if not bool(filespec_matcher.matches([self.build_file_name])):
raise ValueError(
f"The option `[{self.options_scope}].build_file_name` is set to "
f"`{self.build_file_name}`, which is not compatible with "
Expand All @@ -375,13 +375,14 @@ def validate_build_file_name(self, build_file_patterns: tuple[str, ...]) -> None
def filter_by_ignores(
self, putative_targets: Iterable[PutativeTarget], build_file_ignores: tuple[str, ...]
) -> Iterator[PutativeTarget]:
ignore_paths_filespec = Filespec(includes=[*self.ignore_paths, *build_file_ignores])
ignore_paths_filespec_matcher = FilespecMatcher(
(*self.ignore_paths, *build_file_ignores), ()
)
for ptgt in putative_targets:
is_ignored_file = bool(
matches_filespec(
ignore_paths_filespec,
paths=[os.path.join(ptgt.path, self.build_file_name)],
)
ignore_paths_filespec_matcher.matches(
[os.path.join(ptgt.path, self.build_file_name)]
),
)
if is_ignored_file:
continue
Expand Down
8 changes: 3 additions & 5 deletions src/python/pants/core/goals/tailor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from pants.engine.rules import Get, QueryRule, rule
from pants.engine.target import MultipleSourcesField, Target
from pants.engine.unions import UnionRule
from pants.source.filespec import Filespec, matches_filespec
from pants.source.filespec import FilespecMatcher
from pants.testutil.option_util import create_goal_subsystem
from pants.testutil.pytest_util import no_exception
from pants.testutil.rule_runner import RuleRunner
Expand Down Expand Up @@ -79,11 +79,9 @@ async def find_fortran_targets(
all_fortran_files = await Get(Paths, PathGlobs, req.path_globs("*.f90"))
unowned_shell_files = set(all_fortran_files.files) - set(all_owned_sources)

tests_filespec = Filespec(includes=list(FortranTestsSources.default))
tests_filespec_matcher = FilespecMatcher(FortranTestsSources.default, ())
test_filenames = set(
matches_filespec(
tests_filespec, paths=[os.path.basename(path) for path in unowned_shell_files]
)
tests_filespec_matcher.matches([os.path.basename(path) for path in unowned_shell_files])
)
test_files = {path for path in unowned_shell_files if os.path.basename(path) in test_filenames}
sources_files = set(unowned_shell_files) - test_files
Expand Down
5 changes: 2 additions & 3 deletions src/python/pants/engine/internals/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
OwnersNotFoundBehavior,
UnmatchedBuildFileGlobs,
)
from pants.source.filespec import matches_filespec
from pants.util.docutil import bin_name, doc_url
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
Expand Down Expand Up @@ -786,7 +785,7 @@ def create_live_and_deleted_gets(

for candidate_tgt, bfa in zip(candidate_tgts, build_file_addresses):
matching_files = set(
matches_filespec(candidate_tgt.get(SourcesField).filespec, paths=sources_set)
candidate_tgt.get(SourcesField).filespec_matcher.matches(list(sources_set))
)
# Also consider secondary ownership, meaning it's not a `SourcesField` field with
# primary ownership, but the target still should match the file. We can't use
Expand All @@ -798,7 +797,7 @@ def create_live_and_deleted_gets(
)
for secondary_owner_field in secondary_owner_fields:
matching_files.update(
matches_filespec(secondary_owner_field.filespec, paths=sources_set)
*secondary_owner_field.filespec_matcher.matches(list(sources_set))
)
if not matching_files and not (
owners_request.match_if_owning_build_file_included_in_sources
Expand Down
12 changes: 7 additions & 5 deletions src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ from typing import Any, Generic, Iterable, Sequence, TextIO, Tuple, TypeVar, ove

from typing_extensions import Protocol

from pants.engine.fs import PathGlobs
from pants.engine.internals.scheduler import Workunit, _PathGlobsAndRootCollection
from pants.engine.internals.session import SessionValues
from pants.engine.process import InteractiveProcessResult
Expand Down Expand Up @@ -149,16 +148,19 @@ class RemovePrefix:
def __hash__(self) -> int: ...
def __repr__(self) -> str: ...

class FilespecMatcher:
def __init__(self, includes: Sequence[str], excludes: Sequence[str]) -> None: ...
def __eq__(self, other: FilespecMatcher | Any) -> bool: ...
def __hash__(self) -> int: ...
def __repr__(self) -> str: ...
def matches(self, paths: Sequence[str]) -> list[str]: ...

EMPTY_DIGEST: Digest
EMPTY_FILE_DIGEST: FileDigest
EMPTY_SNAPSHOT: Snapshot

def default_cache_path() -> str: ...

# TODO: Really, `paths` should be `Sequence[str]`. Fix and update call sites so that we don't
# cast to `tuple()` when not necessary.
def match_path_globs(path_globs: PathGlobs, paths: tuple[str, ...]) -> str: ...

# ------------------------------------------------------------------------------
# Workunits
# ------------------------------------------------------------------------------
Expand Down
16 changes: 14 additions & 2 deletions src/python/pants/engine/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
)
from pants.engine.unions import UnionMembership, UnionRule, union
from pants.option.global_options import UnmatchedBuildFileGlobs
from pants.source.filespec import Filespec
from pants.source.filespec import Filespec, FilespecMatcher
from pants.util.collections import ensure_list, ensure_str_list
from pants.util.dirutil import fast_relpath
from pants.util.docutil import bin_name, doc_url
Expand Down Expand Up @@ -2071,7 +2071,7 @@ def path_globs(self, unmatched_build_file_globs: UnmatchedBuildFileGlobs) -> Pat
),
)

@property
@memoized_property
def filespec(self) -> Filespec:
"""The original globs, returned in the Filespec dict format.

Expand All @@ -2089,6 +2089,12 @@ def filespec(self) -> Filespec:
result["excludes"] = excludes
return result

@memoized_property
def filespec_matcher(self) -> FilespecMatcher:
# Note: memoized because parsing the globs is expensive:
# https://github.com/pantsbuild/pants/issues/16122
return FilespecMatcher(self.filespec["includes"], self.filespec.get("excludes", []))


class MultipleSourcesField(SourcesField, StringSequenceField):
"""The `sources: list[str]` field.
Expand Down Expand Up @@ -2408,6 +2414,12 @@ def filespec(self) -> Filespec:
the build root.
"""

@memoized_property
def filespec_matcher(self) -> FilespecMatcher:
# Note: memoized because parsing the globs is expensive:
# https://github.com/pantsbuild/pants/issues/16122
return FilespecMatcher(self.filespec["includes"], self.filespec.get("excludes", []))


def targets_with_sources_types(
sources_types: Iterable[type[SourcesField]],
Expand Down
16 changes: 7 additions & 9 deletions src/python/pants/source/filespec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

from typing_extensions import TypedDict

from pants.engine.fs import PathGlobs
from pants.engine.internals import native_engine
from pants.base.deprecated import deprecated
from pants.engine.internals.native_engine import (
FilespecMatcher as FilespecMatcher, # explicit re-export
)


class _IncludesDict(TypedDict, total=True):
Expand All @@ -26,11 +28,7 @@ class Filespec(_IncludesDict, total=False):
excludes: list[str]


@deprecated("2.15.0.dev0", "Use `FilespecMatcher().matches()` instead", start_version="2.14.0.dev5")
def matches_filespec(spec: Filespec, *, paths: Iterable[str]) -> tuple[str, ...]:
include_patterns = spec["includes"]
exclude_patterns = [f"!{e}" for e in spec.get("excludes", [])]
return tuple(
native_engine.match_path_globs(
PathGlobs((*include_patterns, *exclude_patterns)), tuple(paths)
)
)
matcher = FilespecMatcher(spec["includes"], spec.get("excludes", []))
return tuple(matcher.matches(tuple(paths)))
6 changes: 4 additions & 2 deletions src/python/pants/source/filespec_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from pants.engine.fs import PathGlobs, Snapshot
from pants.source.filespec import matches_filespec
from pants.source.filespec import FilespecMatcher, matches_filespec
from pants.testutil.rule_runner import RuleRunner


Expand All @@ -19,7 +19,9 @@ def assert_rule_match(
rule_runner: RuleRunner, glob: str, paths: Tuple[str, ...], *, should_match: bool
) -> None:
# Confirm in-memory behavior.
matched_filespec = matches_filespec({"includes": [glob]}, paths=paths)
matched_filespec = tuple(FilespecMatcher([glob], ()).matches(paths))
deprecated_matched_filespec = matches_filespec({"includes": [glob]}, paths=paths)
assert matched_filespec == deprecated_matched_filespec
if should_match:
assert matched_filespec == paths
else:
Expand Down
Loading