Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use libCST to parse imports from python 3 code #10907

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions 3rdparty/python/constraints.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by build-support/bin/generate_lockfile.sh on Mon Sep 28 09:49:00 PDT 2020
# Generated by build-support/bin/generate_lockfile.sh on Mon Oct 5 00:17:05 PDT 2020
ansicolors==1.1.8
attrs==20.2.0
beautifulsoup4==4.6.3
Expand All @@ -11,14 +11,15 @@ fasteners==0.15
idna==2.10
importlib-metadata==2.0.0
iniconfig==1.0.1
libcst==0.3.12
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
monotonic==1.5
more-itertools==8.5.0
mypy==0.782
mypy-extensions==0.4.3
packaging==20.4
pathspec==0.8.0
pex==2.1.16
pip==19.0.3
pex==2.1.17
pip==18.1
pluggy==0.13.1
psutil==5.7.0
py==1.9.0
Expand All @@ -35,6 +36,7 @@ six==1.15.0
toml==0.10.1
typed-ast==1.4.1
typing-extensions==3.7.4.2
typing-inspect==0.6.0
urllib3==1.25.10
www-authenticate==0.9.2
zipp==3.2.0
zipp==3.3.0
1 change: 1 addition & 0 deletions 3rdparty/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ ansicolors==1.1.8
beautifulsoup4>=4.6.0,<4.7
dataclasses==0.6
fasteners==0.15.0
libcst==0.3.12
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved

# The MyPy requirement should be maintained in lockstep with the requirement the Pants repo uses
# for the mypy task since it configures custom MyPy plugins. That requirement can be found via:
Expand Down
129 changes: 89 additions & 40 deletions src/python/pants/backend/python/dependency_inference/import_parser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import ast as ast3
import re
import sys
from dataclasses import dataclass
from typing import Optional, Set, Tuple
from typing import Any, Optional, Set, Type, TypeVar, Union

import libcst as cst
from typed_ast import ast27

from pants.util.memo import memoized_property
Expand Down Expand Up @@ -35,40 +34,55 @@ def all_imports(self) -> FrozenOrderedSet[str]:
return FrozenOrderedSet(sorted([*self.explicit_imports, *self.inferred_imports]))


def parse_file(*, filename: str, content: str) -> Optional[Tuple]:
_Visitor = TypeVar("_Visitor")


class VisitorInterface:
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
explicit_imports: Set[str]
inferred_imports: Set[str]

def __init__(self, module_name: str) -> None:
...

@classmethod
def visit_tree(cls: Type[_Visitor], tree: Any, module_name: str) -> _Visitor:
...


def parse_file(*, filename: str, content: str, module_name: str) -> VisitorInterface:
"""Parse the file for python imports, and return a visitor with the imports it found."""
try:
# NB: The Python 3 ast is generally backwards-compatible with earlier versions. The only
# breaking change is `async` `await` becoming reserved keywords in Python 3.7 (deprecated
# in 3.6). If the std-lib fails to parse, we could use typed-ast to try parsing with a
# target version of Python 3.5, but we don't because Python 3.5 is almost EOL and has very
# low usage.
# We will also fail to parse Python 3.8 syntax if Pants is run with Python 3.6 or 3.7.
# There is no known workaround for this, beyond users changing their `./pants` script to
# always use >= 3.8.
tree = ast3.parse(content, filename=filename)
visitor_cls = _Py3AstVisitor if sys.version_info[:2] < (3, 8) else _Py38AstVisitor
return tree, visitor_cls
py27_tree = ast27.parse(content, filename=filename)
Eric-Arellano marked this conversation as resolved.
Show resolved Hide resolved
completed_visitor = _Py27AstVisitor.visit_tree(py27_tree, module_name=module_name)
return completed_visitor
except SyntaxError:
try:
py27_tree = ast27.parse(content, filename=filename)
return py27_tree, _Py27AstVisitor
except SyntaxError:
return None
# NB: When the python 2 ast visitor fails to parse python 3 syntax, it raises a
# SyntaxError. This may also occur when the file contains invalid python code. If we parse a
# python 3 file with a python 2 parser, that should not change the imports we calculate.
pass

# Parse all python 3 code with libCST.
# NB: Since all python 3 code is forwards-compatible with the 3.8 parser, and the import syntax
# remains unchanged, we are safely able to use the 3.8 parser for parsing imports.
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
config = cst.PartialParserConfig(python_version="3.8")
cst_tree = cst.parse_module(content, config=config)
# NB: If the file contains invalid python code, it will raise an exception we expect to be
# caught.
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
completed_visitor = _CSTVisitor.visit_tree(cst_tree, module_name=module_name)
return completed_visitor


def find_python_imports(*, filename: str, content: str, module_name: str) -> ParsedPythonImports:
parse_result = parse_file(filename=filename, content=content)
# If there were syntax errors, gracefully early return. This is more user friendly than
# propagating the exception. Dependency inference simply won't be used for that file, and
# it'll be up to the tool actually being run (e.g. Pytest or Flake8) to error.
if parse_result is None:
try:
completed_visitor = parse_file(filename=filename, content=content, module_name=module_name)
except cst.ParserSyntaxError:
return ParsedPythonImports(FrozenOrderedSet(), FrozenOrderedSet())
tree, ast_visitor_cls = parse_result
ast_visitor = ast_visitor_cls(module_name)
ast_visitor.visit(tree)
return ParsedPythonImports(
explicit_imports=FrozenOrderedSet(sorted(ast_visitor.explicit_imports)),
inferred_imports=FrozenOrderedSet(sorted(ast_visitor.inferred_imports)),
explicit_imports=FrozenOrderedSet(sorted(completed_visitor.explicit_imports)),
inferred_imports=FrozenOrderedSet(sorted(completed_visitor.inferred_imports)),
)


Expand All @@ -77,13 +91,19 @@ def find_python_imports(*, filename: str, content: str, module_name: str) -> Par
_INFERRED_IMPORT_REGEX = re.compile(r"^([a-z_][a-z_\d]*\.){2,}[a-zA-Z_]\w*$")


class _BaseAstVisitor:
class _Py27AstVisitor(ast27.NodeVisitor, VisitorInterface):
def __init__(self, module_name: str) -> None:
self._module_parts = module_name.split(".")
self.explicit_imports: Set[str] = set()
self.inferred_imports: Set[str] = set()

def maybe_add_inferred_import(self, s: str) -> None:
@classmethod
def visit_tree(cls: Type[_Visitor], tree: Any, module_name: str) -> _Visitor:
visitor = cls(module_name)
visitor.visit(tree)
return visitor

def _maybe_add_inferred_import(self, s: str) -> None:
if _INFERRED_IMPORT_REGEX.match(s):
self.inferred_imports.add(s)

Expand All @@ -99,20 +119,49 @@ def visit_ImportFrom(self, node) -> None:
for alias in node.names:
self.explicit_imports.add(f"{abs_module}.{alias.name}")


class _Py27AstVisitor(ast27.NodeVisitor, _BaseAstVisitor):
def visit_Str(self, node) -> None:
val = ensure_text(node.s)
self.maybe_add_inferred_import(val)
self._maybe_add_inferred_import(val)


class _Py3AstVisitor(ast3.NodeVisitor, _BaseAstVisitor):
def visit_Str(self, node) -> None:
self.maybe_add_inferred_import(node.s)
class _CSTVisitor(cst.CSTVisitor, VisitorInterface):
def __init__(self, module_name: str) -> None:
self._module_parts = module_name.split(".")
self.explicit_imports: Set[str] = set()
self.inferred_imports: Set[str] = set()

@classmethod
def visit_tree(cls: Type[_Visitor], tree: Any, module_name: str) -> _Visitor:
visitor = cls(module_name)
tree.visit(visitor)
return visitor

def _maybe_add_inferred_import(self, s: Union[str, bytes]) -> None:
if isinstance(s, bytes):
return
Comment on lines +139 to +140
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because SimpleString includes bytes, I presume?

Copy link
Contributor Author

@cosmicexplorer cosmicexplorer Oct 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, although it's not explicitly mentioned in the documentation: https://libcst.readthedocs.io/en/latest/nodes.html?highlight=import#libcst.SimpleString.

With pex:

>>> cst.parse_expression('b"asdf"').evaluated_value
b'asdf'
>>> type(cst.parse_expression('b"asdf"'))
<class 'libcst._nodes.expression.SimpleString'>
>>> cst.parse_expression('"asdf"').evaluated_value
'asdf'
>>> type(cst.parse_expression('"asdf"'))
<class 'libcst._nodes.expression.SimpleString'>

if _INFERRED_IMPORT_REGEX.match(s):
self.inferred_imports.add(s)

def _flatten_attribute_or_name(self, node: Optional[Union[cst.Attribute, cst.Name]]) -> str:
if node is None:
return ""
if isinstance(node, cst.Name):
return node.value
inner = self._flatten_attribute_or_name(node.value)
return f"{inner}.{node.attr.value}"

def visit_Import(self, node) -> None:
for alias in node.names:
self.explicit_imports.add(self._flatten_attribute_or_name(alias.name))

def visit_ImportFrom(self, node) -> None:
rel_module = self._flatten_attribute_or_name(node.module)
abs_module = ".".join(
self._module_parts[0 : -len(node.relative)]
+ ([] if rel_module is None else [rel_module])
)
for alias in node.names:
self.explicit_imports.add(f"{abs_module}.{alias.name.value}")

class _Py38AstVisitor(ast3.NodeVisitor, _BaseAstVisitor):
# Python 3.8 deprecated the Str node in favor of Constant.
def visit_Constant(self, node) -> None:
if isinstance(node.value, str):
self.maybe_add_inferred_import(node.value)
def visit_SimpleString(self, node) -> None:
self._maybe_add_inferred_import(node.evaluated_value)
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,6 @@ def test_works_with_python2() -> None:
assert set(imports.inferred_imports) == {"dep.from.bytes", "dep.from.str"}


@pytest.mark.skipif(
sys.version_info[:2] < (3, 8),
reason="Cannot parse Python 3.8 unless Pants is run with Python 3.8.",
)
def test_works_with_python38() -> None:
imports = find_python_imports(
filename="foo.py",
Expand Down