From ea542ac164fb73459f7a7a90aff9c6646d2213e6 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 7 Oct 2020 20:27:24 -0700 Subject: [PATCH] use libCST to parse imports from python 3 code (#10907) ### Problem Fixes #10894. ### Solution - Add `libcst>=0.3.12,<0.4` to `requirements.txt`. - Remove the usage of the normal python 3 `ast` library from `import_parser.py`, and implement a `_CSTVisitor` to visit import nodes. - When using python 3, instead use the `_CSTVisitor`. - We use python 3.8 to parse all python 3 code, which is 100% correct for the purpose of parsing imports. ### Result We should no longer be dependent on the version of the current python executable to parse python code for dependencies! --- 3rdparty/python/constraints.txt | 10 +- 3rdparty/python/requirements.txt | 1 + .../dependency_inference/import_parser.py | 153 ++++++++++++------ .../import_parser_test.py | 49 +++++- 4 files changed, 149 insertions(+), 64 deletions(-) diff --git a/3rdparty/python/constraints.txt b/3rdparty/python/constraints.txt index 497a23e6a6c..fbd0d3542ce 100644 --- a/3rdparty/python/constraints.txt +++ b/3rdparty/python/constraints.txt @@ -1,4 +1,4 @@ -# Generated by build-support/bin/generate_lockfile.sh on Mon Sep 28 09:49:00 PDT 2020 +# Generated by build-support/bin/generate_lockfile.sh on Wed Oct 7 12:00:29 PDT 2020 ansicolors==1.1.8 attrs==20.2.0 beautifulsoup4==4.6.3 @@ -11,14 +11,15 @@ fasteners==0.15 idna==2.10 importlib-metadata==2.0.0 iniconfig==1.0.1 +libcst==0.3.12 monotonic==1.5 more-itertools==8.5.0 mypy==0.782 mypy-extensions==0.4.3 packaging==20.4 pathspec==0.8.0 -pex==2.1.16 -pip==19.0.3 +pex==2.1.17 +pip==18.1 pluggy==0.13.1 psutil==5.7.0 py==1.9.0 @@ -35,6 +36,7 @@ six==1.15.0 toml==0.10.1 typed-ast==1.4.1 typing-extensions==3.7.4.2 +typing-inspect==0.6.0 urllib3==1.25.10 www-authenticate==0.9.2 -zipp==3.2.0 +zipp==3.3.0 diff --git a/3rdparty/python/requirements.txt b/3rdparty/python/requirements.txt index e0aa9d09854..8252dc58ee0 100644 --- a/3rdparty/python/requirements.txt +++ b/3rdparty/python/requirements.txt @@ -2,6 +2,7 @@ ansicolors==1.1.8 beautifulsoup4>=4.6.0,<4.7 dataclasses==0.6 fasteners==0.15.0 +libcst>=0.3.12,<0.4 # The MyPy requirement should be maintained in lockstep with the requirement the Pants repo uses # for the mypy task since it configures custom MyPy plugins. That requirement can be found via: diff --git a/src/python/pants/backend/python/dependency_inference/import_parser.py b/src/python/pants/backend/python/dependency_inference/import_parser.py index b00e24899cb..dd303c149aa 100644 --- a/src/python/pants/backend/python/dependency_inference/import_parser.py +++ b/src/python/pants/backend/python/dependency_inference/import_parser.py @@ -1,21 +1,20 @@ # Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). -import ast as ast3 +import logging import re -import sys from dataclasses import dataclass -from typing import Optional, Set, Tuple +from typing import Optional, Set, Union +import libcst as cst from typed_ast import ast27 +from typing_extensions import Protocol from pants.util.memo import memoized_property from pants.util.ordered_set import FrozenOrderedSet from pants.util.strutil import ensure_text - -class ImportParseError(ValueError): - pass +logger = logging.getLogger(__name__) @dataclass(frozen=True) @@ -34,41 +33,56 @@ class ParsedPythonImports: def all_imports(self) -> FrozenOrderedSet[str]: return FrozenOrderedSet(sorted([*self.explicit_imports, *self.inferred_imports])) + @classmethod + def empty(cls) -> "ParsedPythonImports": + return cls(FrozenOrderedSet(), FrozenOrderedSet()) + + +class VisitorInterface(Protocol): + explicit_imports: Set[str] + inferred_imports: Set[str] -def parse_file(*, filename: str, content: str) -> Optional[Tuple]: + +def parse_file(*, filename: str, content: str, module_name: str) -> Optional[VisitorInterface]: + """Parse the file for python imports, and return a visitor with the imports it found.""" + # Parse all python 3 code with libCST. We parse assuming python 3 goes first, because we assume + # most user code will be python 3. + # TODO(#10921): identify the appropriate interpreter version beforehand! try: - # NB: The Python 3 ast is generally backwards-compatible with earlier versions. The only - # breaking change is `async` `await` becoming reserved keywords in Python 3.7 (deprecated - # in 3.6). If the std-lib fails to parse, we could use typed-ast to try parsing with a - # target version of Python 3.5, but we don't because Python 3.5 is almost EOL and has very - # low usage. - # We will also fail to parse Python 3.8 syntax if Pants is run with Python 3.6 or 3.7. - # There is no known workaround for this, beyond users changing their `./pants` script to - # always use >= 3.8. - tree = ast3.parse(content, filename=filename) - visitor_cls = _Py3AstVisitor if sys.version_info[:2] < (3, 8) else _Py38AstVisitor - return tree, visitor_cls - except SyntaxError: - try: - py27_tree = ast27.parse(content, filename=filename) - return py27_tree, _Py27AstVisitor - except SyntaxError: - return None + # NB: Since all python 3 code is forwards-compatible with the 3.8 parser, and the import + # syntax remains unchanged, we are safely able to use the 3.8 parser for parsing imports. + # TODO(#10922): Support parsing python 3.9/3.10 with libCST! + config = cst.PartialParserConfig(python_version="3.8") + cst_tree = cst.parse_module(content, config=config) + completed_cst_visitor = _CSTVisitor.visit_tree(cst_tree, module_name=module_name) + return completed_cst_visitor + except cst.ParserSyntaxError as e: + # NB: When the python 3 ast visitor fails to parse python 2 syntax, it raises a + # ParserSyntaxError. This may also occur when the file contains invalid python code. If we + # successfully parse a python 2 file with a python 3 parser, that should not change the + # imports we calculate. + logger.debug(f"Failed to parse {filename} with python 3.8 libCST parser: {e}") + + try: + py27_tree = ast27.parse(content, filename=filename) + completed_ast27_visitor = _Py27AstVisitor.visit_tree(py27_tree, module_name=module_name) + return completed_ast27_visitor + except SyntaxError as e: + logger.debug(f"Failed to parse {filename} with python 2.7 typed-ast parser: {e}") + + return None def find_python_imports(*, filename: str, content: str, module_name: str) -> ParsedPythonImports: - parse_result = parse_file(filename=filename, content=content) + completed_visitor = parse_file(filename=filename, content=content, module_name=module_name) # If there were syntax errors, gracefully early return. This is more user friendly than # propagating the exception. Dependency inference simply won't be used for that file, and # it'll be up to the tool actually being run (e.g. Pytest or Flake8) to error. - if parse_result is None: - return ParsedPythonImports(FrozenOrderedSet(), FrozenOrderedSet()) - tree, ast_visitor_cls = parse_result - ast_visitor = ast_visitor_cls(module_name) - ast_visitor.visit(tree) + if completed_visitor is None: + return ParsedPythonImports.empty() return ParsedPythonImports( - explicit_imports=FrozenOrderedSet(sorted(ast_visitor.explicit_imports)), - inferred_imports=FrozenOrderedSet(sorted(ast_visitor.inferred_imports)), + explicit_imports=FrozenOrderedSet(sorted(completed_visitor.explicit_imports)), + inferred_imports=FrozenOrderedSet(sorted(completed_visitor.inferred_imports)), ) @@ -77,42 +91,77 @@ def find_python_imports(*, filename: str, content: str, module_name: str) -> Par _INFERRED_IMPORT_REGEX = re.compile(r"^([a-z_][a-z_\d]*\.){2,}[a-zA-Z_]\w*$") -class _BaseAstVisitor: +class _Py27AstVisitor(ast27.NodeVisitor): def __init__(self, module_name: str) -> None: self._module_parts = module_name.split(".") self.explicit_imports: Set[str] = set() self.inferred_imports: Set[str] = set() - def maybe_add_inferred_import(self, s: str) -> None: + @classmethod + def visit_tree(cls, tree: ast27.AST, module_name: str) -> "_Py27AstVisitor": + visitor = cls(module_name) + visitor.visit(tree) + return visitor + + def _maybe_add_inferred_import(self, s: str) -> None: if _INFERRED_IMPORT_REGEX.match(s): self.inferred_imports.add(s) - def visit_Import(self, node) -> None: + def visit_Import(self, node: ast27.Import) -> None: for alias in node.names: self.explicit_imports.add(alias.name) - def visit_ImportFrom(self, node) -> None: - rel_module = node.module - abs_module = ".".join( - self._module_parts[0 : -node.level] + ([] if rel_module is None else [rel_module]) - ) + def visit_ImportFrom(self, node: ast27.ImportFrom) -> None: + rel_module = [] if node.module is None else [node.module] + relative_level = 0 if node.level is None else node.level + abs_module = ".".join(self._module_parts[0:-relative_level] + rel_module) for alias in node.names: self.explicit_imports.add(f"{abs_module}.{alias.name}") - -class _Py27AstVisitor(ast27.NodeVisitor, _BaseAstVisitor): - def visit_Str(self, node) -> None: + def visit_Str(self, node: ast27.Str) -> None: val = ensure_text(node.s) - self.maybe_add_inferred_import(val) + self._maybe_add_inferred_import(val) -class _Py3AstVisitor(ast3.NodeVisitor, _BaseAstVisitor): - def visit_Str(self, node) -> None: - self.maybe_add_inferred_import(node.s) +class _CSTVisitor(cst.CSTVisitor): + def __init__(self, module_name: str) -> None: + self._module_parts = module_name.split(".") + self.explicit_imports: Set[str] = set() + self.inferred_imports: Set[str] = set() + @classmethod + def visit_tree(cls, tree: cst.Module, module_name: str) -> "_CSTVisitor": + visitor = cls(module_name) + tree.visit(visitor) + return visitor -class _Py38AstVisitor(ast3.NodeVisitor, _BaseAstVisitor): - # Python 3.8 deprecated the Str node in favor of Constant. - def visit_Constant(self, node) -> None: - if isinstance(node.value, str): - self.maybe_add_inferred_import(node.value) + def _maybe_add_inferred_import(self, s: Union[str, bytes]) -> None: + if isinstance(s, bytes): + return + if _INFERRED_IMPORT_REGEX.match(s): + self.inferred_imports.add(s) + + def _flatten_attribute_or_name(self, node: cst.BaseExpression) -> str: + if isinstance(node, cst.Name): + return node.value + if not isinstance(node, cst.Attribute): + raise TypeError(f"Unrecognized cst.BaseExpression subclass: {node}") + inner = self._flatten_attribute_or_name(node.value) + return f"{inner}.{node.attr.value}" + + def visit_Import(self, node: cst.Import) -> None: + for alias in node.names: + self.explicit_imports.add(self._flatten_attribute_or_name(alias.name)) + + def visit_ImportFrom(self, node: cst.ImportFrom) -> None: + rel_module = [] if node.module is None else [self._flatten_attribute_or_name(node.module)] + relative_level = len(node.relative) + abs_module = ".".join(self._module_parts[0:-relative_level] + rel_module) + if isinstance(node.names, cst.ImportStar): + self.explicit_imports.add(f"{abs_module}.*") + else: + for alias in node.names: + self.explicit_imports.add(f"{abs_module}.{alias.name.value}") + + def visit_SimpleString(self, node: cst.SimpleString) -> None: + self._maybe_add_inferred_import(node.evaluated_value) diff --git a/src/python/pants/backend/python/dependency_inference/import_parser_test.py b/src/python/pants/backend/python/dependency_inference/import_parser_test.py index 25ab75802af..87113176e6a 100644 --- a/src/python/pants/backend/python/dependency_inference/import_parser_test.py +++ b/src/python/pants/backend/python/dependency_inference/import_parser_test.py @@ -1,12 +1,13 @@ # Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). -import sys +import logging from textwrap import dedent -import pytest - -from pants.backend.python.dependency_inference.import_parser import find_python_imports +from pants.backend.python.dependency_inference.import_parser import ( + ParsedPythonImports, + find_python_imports, +) def test_normal_imports() -> None: @@ -146,10 +147,6 @@ def test_works_with_python2() -> None: assert set(imports.inferred_imports) == {"dep.from.bytes", "dep.from.str"} -@pytest.mark.skipif( - sys.version_info[:2] < (3, 8), - reason="Cannot parse Python 3.8 unless Pants is run with Python 3.8.", -) def test_works_with_python38() -> None: imports = find_python_imports( filename="foo.py", @@ -169,3 +166,39 @@ def test_works_with_python38() -> None: ) assert set(imports.explicit_imports) == {"demo", "project.demo.Demo"} assert set(imports.inferred_imports) == {"dep.from.str"} + + +def test_debug_log_with_python39(caplog) -> None: + """We can't parse python 3.9 yet, so we want to be able to provide good debug log.""" + caplog.set_level(logging.DEBUG) + imports = find_python_imports( + filename="foo.py", + # See https://www.python.org/dev/peps/pep-0614/ for the newly relaxed decorator expressions. + content=dedent( + """\ + @buttons[0].clicked.connect + def spam(): + ... + """ + ), + module_name="project.app", + ) + assert imports == ParsedPythonImports.empty() + assert len(caplog.records) == 2 + messages = [rec.getMessage() for rec in caplog.records] + + cst_parse_error = dedent( + """\ + Failed to parse foo.py with python 3.8 libCST parser: Syntax Error @ 1:9. + Incomplete input. Encountered '[', but expected '(', or 'NEWLINE'. + + @buttons[0].clicked.connect + ^""" + ) + assert cst_parse_error == messages[0] + + ast27_parse_error = dedent( + """\ + Failed to parse foo.py with python 2.7 typed-ast parser: invalid syntax (foo.py, line 1)""" + ) + assert ast27_parse_error == messages[1]