Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to infer assets from strings for Python #14049

Merged
merged 26 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pants.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ macos_big_sur_compatibility = true
resolves = { python-default = "3rdparty/python/lockfiles/user_reqs.txt" }
enable_resolves = true

[python-infer]
assets = true

[docformatter]
args = ["--wrap-summaries=100", "--wrap-descriptions=100"]

Expand Down
11 changes: 2 additions & 9 deletions src/python/pants/backend/python/dependency_inference/BUILD
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_sources(
overrides={
"parse_python_imports.py": {
# This Python script is loaded as a resource, see parse_python_imports.py for more info.
"dependencies": ["./scripts:import_parser"]
}
}
)
Comment on lines -4 to -11
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might ask where this has gone. The answer: it's being inferred 🪄

python_sources()

python_tests(
name="tests",
overrides={"parse_python_imports_test.py": {"timeout": 120}},
overrides={"parse_python_dependencies_test.py": {"timeout": 120}},
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pants.backend.python.util_rules.pex_environment import PythonExecutable
from pants.core.util_rules.source_files import SourceFilesRequest
from pants.core.util_rules.stripped_source_files import StrippedSourceFiles
from pants.engine.collection import DeduplicatedCollection
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests
from pants.engine.process import Process, ProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, rule
Expand All @@ -31,21 +32,37 @@ class ParsedPythonImports(FrozenDict[str, ParsedPythonImportInfo]):
"""All the discovered imports from a Python source file mapped to the relevant info."""


class ParsedPythonAssets(DeduplicatedCollection[str]):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe ParsedPythonAssetPaths? I continue to confuse myself with what this is, I think because of the old setup of it being a tuple.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. If/when we support pkgutil.get_data we'll be changing this name 😅 but for now that works!

"""All the discovered possible assets from a Python source file."""

# N.B. Don't set `sort_input`, as the input is already sorted


@dataclass(frozen=True)
class ParsedPythonDependencies:
imports: ParsedPythonImports
assets: ParsedPythonAssets


@dataclass(frozen=True)
class ParsePythonImportsRequest:
class ParsePythonDependenciesRequest:
source: PythonSourceField
interpreter_constraints: InterpreterConstraints
string_imports: bool
string_imports_min_dots: int
assets: bool
assets_min_slashes: int


@rule
async def parse_python_imports(request: ParsePythonImportsRequest) -> ParsedPythonImports:
script = pkgutil.get_data(__name__, "scripts/import_parser.py")
async def parse_python_dependencies(
request: ParsePythonDependenciesRequest,
) -> ParsedPythonDependencies:
script = pkgutil.get_data(__name__, "scripts/dependency_parser.py")
assert script is not None
python_interpreter, script_digest, stripped_sources = await MultiGet(
Get(PythonExecutable, InterpreterConstraints, request.interpreter_constraints),
Get(Digest, CreateDigest([FileContent("__parse_python_imports.py", script)])),
Get(Digest, CreateDigest([FileContent("__parse_python_dependencies.py", script)])),
Get(StrippedSourceFiles, SourceFilesRequest([request.source])),
)

Expand All @@ -61,23 +78,30 @@ async def parse_python_imports(request: ParsePythonImportsRequest) -> ParsedPyth
Process(
argv=[
python_interpreter.path,
"./__parse_python_imports.py",
"./__parse_python_dependencies.py",
file,
],
input_digest=input_digest,
description=f"Determine Python imports for {request.source.address}",
description=f"Determine Python dependencies for {request.source.address}",
env={
"STRING_IMPORTS": "y" if request.string_imports else "n",
"MIN_DOTS": str(request.string_imports_min_dots),
"STRING_IMPORTS_MIN_DOTS": str(request.string_imports_min_dots),
"ASSETS": "y" if request.assets else "n",
"ASSETS_MIN_SLASHES": str(request.assets_min_slashes),
},
level=LogLevel.DEBUG,
),
)
# See above for where we explicitly encoded as utf8. Even though utf8 is the
# default for decode(), we make that explicit here for emphasis.
process_output = process_result.stdout.decode("utf8") or "{}"
return ParsedPythonImports(
(imp, ParsedPythonImportInfo(**info)) for imp, info in json.loads(process_output).items()
output = json.loads(process_output)

return ParsedPythonDependencies(
imports=ParsedPythonImports(
(key, ParsedPythonImportInfo(**val)) for key, val in output.get("imports", {}).items()
),
assets=ParsedPythonAssets(output.get("assets", [])),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

import pytest

from pants.backend.python.dependency_inference import parse_python_imports
from pants.backend.python.dependency_inference.parse_python_imports import (
from pants.backend.python.dependency_inference import parse_python_dependencies
from pants.backend.python.dependency_inference.parse_python_dependencies import (
ParsedPythonDependencies,
)
from pants.backend.python.dependency_inference.parse_python_dependencies import (
ParsedPythonImportInfo as ImpInfo,
)
from pants.backend.python.dependency_inference.parse_python_imports import (
ParsedPythonImports,
ParsePythonImportsRequest,
from pants.backend.python.dependency_inference.parse_python_dependencies import (
ParsePythonDependenciesRequest,
)
from pants.backend.python.target_types import PythonSourceField, PythonSourceTarget
from pants.backend.python.util_rules import pex
Expand All @@ -32,24 +34,27 @@
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
*parse_python_imports.rules(),
*parse_python_dependencies.rules(),
*stripped_source_files.rules(),
*pex.rules(),
QueryRule(ParsedPythonImports, [ParsePythonImportsRequest]),
QueryRule(ParsedPythonDependencies, [ParsePythonDependenciesRequest]),
],
target_types=[PythonSourceTarget],
)


def assert_imports_parsed(
def assert_deps_parsed(
rule_runner: RuleRunner,
content: str,
*,
expected: dict[str, ImpInfo],
expected_imports: dict[str, ImpInfo] = {},
expected_assets: list[str] = [],
filename: str = "project/foo.py",
constraints: str = ">=3.6",
string_imports: bool = True,
string_imports_min_dots: int = 2,
assets: bool = True,
assets_min_slashes: int = 1,
) -> None:
rule_runner.set_options([], env_inherit={"PATH", "PYENV_ROOT", "HOME"})
rule_runner.write_files(
Expand All @@ -59,18 +64,21 @@ def assert_imports_parsed(
}
)
tgt = rule_runner.get_target(Address("", target_name="t"))
imports = rule_runner.request(
ParsedPythonImports,
result = rule_runner.request(
ParsedPythonDependencies,
[
ParsePythonImportsRequest(
ParsePythonDependenciesRequest(
tgt[PythonSourceField],
InterpreterConstraints([constraints]),
string_imports=string_imports,
string_imports_min_dots=string_imports_min_dots,
assets=assets,
assets_min_slashes=assets_min_slashes,
)
],
)
assert dict(imports) == expected
assert dict(result.imports) == expected_imports
assert list(result.assets) == sorted(expected_assets)


def test_normal_imports(rule_runner: RuleRunner) -> None:
Expand Down Expand Up @@ -108,10 +116,10 @@ def test_normal_imports(rule_runner: RuleRunner) -> None:
from project.circular_dep import CircularDep
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
expected={
expected_imports={
"__future__.print_function": ImpInfo(lineno=1, weak=False),
"os": ImpInfo(lineno=3, weak=False),
"os.path": ImpInfo(lineno=5, weak=False),
Expand Down Expand Up @@ -152,10 +160,10 @@ def test_dunder_import_call(rule_runner: RuleRunner) -> None:
) # pants: no-infer-dep
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
expected={
expected_imports={
"pkg_resources": ImpInfo(lineno=1, weak=False),
"not_ignored_but_looks_like_it_could_be": ImpInfo(lineno=4, weak=False),
"also_not_ignored_but_looks_like_it_could_be": ImpInfo(lineno=10, weak=False),
Expand Down Expand Up @@ -213,10 +221,10 @@ def test_try_except(rule_runner: RuleRunner) -> None:
import strong9
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
expected={
expected_imports={
"strong1": ImpInfo(lineno=1, weak=False),
"weak1": ImpInfo(lineno=4, weak=True),
"weak2": ImpInfo(lineno=7, weak=True),
Expand Down Expand Up @@ -252,11 +260,11 @@ def test_relative_imports(rule_runner: RuleRunner, basename: str) -> None:
)
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
filename=f"project/util/{basename}",
expected={
expected_imports={
"project.util.sibling": ImpInfo(lineno=1, weak=False),
"project.util.sibling.Nibling": ImpInfo(lineno=2, weak=False),
"project.util.subdir.child.Child": ImpInfo(lineno=3, weak=False),
Expand Down Expand Up @@ -316,20 +324,22 @@ def test_imports_from_strings(rule_runner: RuleRunner, min_dots: int) -> None:
}
expected = {sym: info for sym, info in potentially_valid.items() if sym.count(".") >= min_dots}

assert_imports_parsed(rule_runner, content, expected=expected, string_imports_min_dots=min_dots)
assert_imports_parsed(rule_runner, content, string_imports=False, expected={})
assert_deps_parsed(
rule_runner, content, expected_imports=expected, string_imports_min_dots=min_dots
)
assert_deps_parsed(rule_runner, content, string_imports=False, expected_imports={})


def test_real_import_beats_string_import(rule_runner: RuleRunner) -> None:
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
"import one.two.three; 'one.two.three'",
expected={"one.two.three": ImpInfo(lineno=1, weak=False)},
expected_imports={"one.two.three": ImpInfo(lineno=1, weak=False)},
)


def test_real_import_beats_tryexcept_import(rule_runner: RuleRunner) -> None:
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
dedent(
"""\
Expand All @@ -338,16 +348,16 @@ def test_real_import_beats_tryexcept_import(rule_runner: RuleRunner) -> None:
except ImportError: pass
"""
),
expected={"one.two.three": ImpInfo(lineno=1, weak=False)},
expected_imports={"one.two.three": ImpInfo(lineno=1, weak=False)},
)


def test_gracefully_handle_syntax_errors(rule_runner: RuleRunner) -> None:
assert_imports_parsed(rule_runner, "x =", expected={})
assert_deps_parsed(rule_runner, "x =", expected_imports={})


def test_handle_unicode(rule_runner: RuleRunner) -> None:
assert_imports_parsed(rule_runner, "x = 'äbç'", expected={})
assert_deps_parsed(rule_runner, "x = 'äbç'", expected_imports={})


@skip_unless_python27_present
Expand Down Expand Up @@ -376,11 +386,11 @@ def test_works_with_python2(rule_runner: RuleRunner) -> None:
finally: import strong3
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
constraints="==2.7.*",
expected={
expected_imports={
"demo": ImpInfo(lineno=4, weak=False),
"project.demo.Demo": ImpInfo(lineno=5, weak=False),
"pkg_resources": ImpInfo(lineno=7, weak=False),
Expand Down Expand Up @@ -413,11 +423,11 @@ def test_works_with_python38(rule_runner: RuleRunner) -> None:
importlib.import_module("dep.from.str")
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
constraints=">=3.8",
expected={
expected_imports={
"demo": ImpInfo(lineno=5, weak=False),
"project.demo.Demo": ImpInfo(lineno=6, weak=False),
"pkg_resources": ImpInfo(lineno=8, weak=False),
Expand Down Expand Up @@ -446,15 +456,66 @@ def test_works_with_python39(rule_runner: RuleRunner) -> None:
importlib.import_module("dep.from.str")
"""
)
assert_imports_parsed(
assert_deps_parsed(
rule_runner,
content,
constraints=">=3.9",
expected={
expected_imports={
"demo": ImpInfo(lineno=7, weak=False),
"project.demo.Demo": ImpInfo(lineno=8, weak=False),
"pkg_resources": ImpInfo(lineno=10, weak=False),
"treat.as.a.regular.import.not.a.string.import": ImpInfo(lineno=11, weak=False),
"dep.from.str": ImpInfo(lineno=13, weak=True),
},
)


@pytest.mark.parametrize("min_slashes", [1, 2, 3, 4])
def test_assets(rule_runner: RuleRunner, min_slashes: int) -> None:
content = dedent(
"""\
modules = [
# Potentially valid assets (depending on min_slashes).
'data/a.json',
'data/a.txt',
'data/a.tar.gz',
'data/subdir1/a.json',
'data/subdir1/subdir2/a.json',
'data/subdir1/subdir2/subdir3/a.json',
'狗/狗.狗',

# Looks weird, but Unix and pathlib treat repeated "/" as one slash.
# Our parsing, however considers this as multiple slashes.
'//foo.bar',
'//foo/////bar.txt',

# Probably invalid assets.
'noslashes',
'data/database', # Unfortunately, extenionless files don't get matched.

# Definitely invalid assets.
'a/........',
'\\n/foo.json',
'data/a.b/c.d',
'windows\\style.txt',
]
"""
)

potentially_valid = {
"data/a.json",
"data/a.txt",
"data/a.tar.gz",
"data/subdir1/a.json",
"data/subdir1/subdir2/a.json",
"data/subdir1/subdir2/subdir3/a.json",
"狗/狗.狗",
"//foo.bar",
"//foo/////bar.txt",
}
expected = [s for s in potentially_valid if s.count("/") >= min_slashes]

assert_deps_parsed(
rule_runner, content, expected_assets=expected, assets_min_slashes=min_slashes
)
assert_deps_parsed(rule_runner, content, assets=False, expected_assets=[])
Loading