Skip to content

Commit

Permalink
More cleanup of code for Python before 3.8, fix astroid import (#156)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmojaki authored Nov 26, 2024
1 parent eb1e401 commit 89f02d2
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 151 deletions.
1 change: 0 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ The API Reference is here: http://asttokens.readthedocs.io/en/latest/api-index.h

Usage
-----
ASTTokens works with both Python2 and Python3.

ASTTokens can annotate both trees built by `ast <https://docs.python.org/2/library/ast.html>`_,
AND those built by `astroid <https://github.com/PyCQA/astroid>`_.
Expand Down
30 changes: 8 additions & 22 deletions asttokens/asttokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,6 @@ class ASTTokens(ASTTextBase):

def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
# type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases

super(ASTTokens, self).__init__(source_text, filename)

self._tree = ast.parse(source_text, filename) if parse else tree
Expand Down Expand Up @@ -292,9 +289,6 @@ class ASTText(ASTTextBase):
"""
def __init__(self, source_text, tree=None, filename='<unknown>'):
# type: (Any, Optional[Module], str) -> None
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases

super(ASTText, self).__init__(source_text, filename)

self._tree = tree
Expand Down Expand Up @@ -327,10 +321,6 @@ def _get_text_positions_tokenless(self, node, padded):
"""
Version of ``get_text_positions()`` that doesn't use tokens.
"""
if sys.version_info[:2] < (3, 8): # pragma: no cover
# This is just for mpypy
raise AssertionError("This method should only be called internally after checking supports_tokenless()")

if is_module(node):
# Modules don't have position info, so just return the range of the whole text.
# The token-using method does something different, but its behavior seems weird and inconsistent.
Expand Down Expand Up @@ -413,16 +403,14 @@ def get_text_positions(self, node, padded):
return self.asttokens.get_text_positions(node, padded)


# Node types that _get_text_positions_tokenless doesn't support. Only relevant for Python 3.8+.
_unsupported_tokenless_types = () # type: Tuple[str, ...]
if sys.version_info[:2] >= (3, 8):
# no lineno
_unsupported_tokenless_types += ("arguments", "Arguments", "withitem")
if sys.version_info[:2] == (3, 8):
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
_unsupported_tokenless_types += ("arg", "Starred")
# no lineno in 3.8
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
# Node types that _get_text_positions_tokenless doesn't support.
# These initial values are missing lineno.
_unsupported_tokenless_types = ("arguments", "Arguments", "withitem") # type: Tuple[str, ...]
if sys.version_info[:2] == (3, 8):
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
_unsupported_tokenless_types += ("arg", "Starred")
# no lineno in 3.8
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")


def supports_tokenless(node=None):
Expand All @@ -434,7 +422,6 @@ def supports_tokenless(node=None):
The following cases are not supported:
- Python 3.7 and earlier
- PyPy
- ``ast.arguments`` / ``astroid.Arguments``
- ``ast.withitem``
Expand All @@ -459,6 +446,5 @@ def supports_tokenless(node=None):
)
)
)
and sys.version_info[:2] >= (3, 8)
and 'pypy' not in sys.version.lower()
)
47 changes: 14 additions & 33 deletions asttokens/mark_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from . import util
from .asttokens import ASTTokens
from .util import AstConstant
from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer

if TYPE_CHECKING:
Expand Down Expand Up @@ -177,13 +176,6 @@ def handle_comp(self, open_brace, node, first_token, last_token):
util.expect_token(before, token.OP, open_brace)
return (before, last_token)

# Python 3.8 fixed the starting position of list comprehensions:
# https://bugs.python.org/issue31241
if sys.version_info < (3, 8):
def visit_listcomp(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
return self.handle_comp('[', node, first_token, last_token)

def visit_comprehension(self,
node, # type: AstNode
first_token, # type: util.Token
Expand Down Expand Up @@ -296,26 +288,19 @@ def handle_bare_tuple(self, node, first_token, last_token):
last_token = maybe_comma
return (first_token, last_token)

if sys.version_info >= (3, 8):
# In Python3.8 parsed tuples include parentheses when present.
def handle_tuple_nonempty(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
# It's a bare tuple if the first token belongs to the first child. The first child may
# include extraneous parentheses (which don't create new nodes), so account for those too.
child = node.elts[0]
if TYPE_CHECKING:
child = cast(AstNode, child)
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
if first_token == child_first:
return self.handle_bare_tuple(node, first_token, last_token)
return (first_token, last_token)
else:
# Before python 3.8, parsed tuples do not include parens.
def handle_tuple_nonempty(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
(first_token, last_token) = self.handle_bare_tuple(node, first_token, last_token)
return self._gobble_parens(first_token, last_token, False)
# In Python3.8 parsed tuples include parentheses when present.
def handle_tuple_nonempty(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
# It's a bare tuple if the first token belongs to the first child. The first child may
# include extraneous parentheses (which don't create new nodes), so account for those too.
child = node.elts[0]
if TYPE_CHECKING:
child = cast(AstNode, child)
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
if first_token == child_first:
return self.handle_bare_tuple(node, first_token, last_token)
return (first_token, last_token)

def visit_tuple(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
Expand Down Expand Up @@ -417,19 +402,15 @@ def visit_num(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token)

# In Astroid, the Num and Str nodes are replaced by Const.
def visit_const(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
assert isinstance(node, AstConstant) or isinstance(node, nc.Const)
assert isinstance(node, ast.Constant) or isinstance(node, nc.Const)
if isinstance(node.value, numbers.Number):
return self.handle_num(node, node.value, first_token, last_token)
elif isinstance(node.value, (str, bytes)):
return self.visit_str(node, first_token, last_token)
return (first_token, last_token)

# In Python >= 3.6, there is a similar class 'Constant' for literals
# In 3.8 it became the type produced by ast.parse
# https://bugs.python.org/issue32892
visit_constant = visit_const

def visit_keyword(self, node, first_token, last_token):
Expand Down
183 changes: 88 additions & 95 deletions asttokens/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,21 @@
import tokenize
from abc import ABCMeta
from ast import Module, expr, AST
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING

import astroid

from functools import lru_cache
from typing import (
Callable,
Dict,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
Any,
TYPE_CHECKING,
Type,
)

if TYPE_CHECKING: # pragma: no cover
from .astroid_compat import NodeNG
Expand Down Expand Up @@ -67,13 +78,6 @@ def __str__(self):
return token_repr(self.type, self.string)


if sys.version_info >= (3, 6):
AstConstant = ast.Constant
else:
class AstConstant:
value = object()


def match_token(token, tok_type, tok_str=None):
# type: (Token, int, Optional[str]) -> bool
"""Returns true if token is of the given type and, if a string is given, has that string."""
Expand All @@ -91,22 +95,13 @@ def expect_token(token, tok_type, tok_str=None):
token_repr(tok_type, tok_str), str(token),
token.start[0], token.start[1] + 1))

# These were previously defined in tokenize.py and distinguishable by being greater than
# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly.
if sys.version_info >= (3, 7):
def is_non_coding_token(token_type):
# type: (int) -> bool
"""
These are considered non-coding tokens, as they don't affect the syntax tree.
"""
return token_type in (token.NL, token.COMMENT, token.ENCODING)
else:
def is_non_coding_token(token_type):
# type: (int) -> bool
"""
These are considered non-coding tokens, as they don't affect the syntax tree.
"""
return token_type >= token.N_TOKENS

def is_non_coding_token(token_type):
# type: (int) -> bool
"""
These are considered non-coding tokens, as they don't affect the syntax tree.
"""
return token_type in (token.NL, token.COMMENT, token.ENCODING)


def generate_tokens(text):
Expand Down Expand Up @@ -201,10 +196,19 @@ def is_expr_stmt(node):
return node.__class__.__name__ == 'Expr'



CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,)
try:
from astroid import Const
CONSTANT_CLASSES += (Const,)
except ImportError: # pragma: no cover
# astroid is not available
pass

def is_constant(node):
# type: (AstNode) -> bool
"""Returns whether node is a Constant node."""
return isinstance(node, (ast.Constant, astroid.Const))
return isinstance(node, CONSTANT_CLASSES)


def is_ellipsis(node):
Expand Down Expand Up @@ -421,72 +425,61 @@ def last_stmt(node):
return node


if sys.version_info[:2] >= (3, 8):
from functools import lru_cache

@lru_cache(maxsize=None)
def fstring_positions_work():
# type: () -> bool
"""
The positions attached to nodes inside f-string FormattedValues have some bugs
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
This checks for those bugs more concretely without relying on the Python version.
Specifically this checks:
- Values with a format spec or conversion
- Repeated (i.e. identical-looking) expressions
- f-strings implicitly concatenated over multiple lines.
- Multiline, triple-quoted f-strings.
"""
source = """(
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
f'''
{s} {t}
{u} {v}
'''
)"""
tree = ast.parse(source)
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
positions_are_unique = len(set(name_positions)) == len(name_positions)
correct_source_segments = all(
ast.get_source_segment(source, node) == node.id
for node in name_nodes
)
return positions_are_unique and correct_source_segments
@lru_cache(maxsize=None)
def fstring_positions_work():
# type: () -> bool
"""
The positions attached to nodes inside f-string FormattedValues have some bugs
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
This checks for those bugs more concretely without relying on the Python version.
Specifically this checks:
- Values with a format spec or conversion
- Repeated (i.e. identical-looking) expressions
- f-strings implicitly concatenated over multiple lines.
- Multiline, triple-quoted f-strings.
"""
source = """(
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
f'''
{s} {t}
{u} {v}
'''
)"""
tree = ast.parse(source)
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
positions_are_unique = len(set(name_positions)) == len(name_positions)
correct_source_segments = all(
ast.get_source_segment(source, node) == node.id
for node in name_nodes
)
return positions_are_unique and correct_source_segments

def annotate_fstring_nodes(tree):
# type: (ast.AST) -> None
"""
Add a special attribute `_broken_positions` to nodes inside f-strings
if the lineno/col_offset cannot be trusted.
"""
if sys.version_info >= (3, 12):
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
# In Python 3.12, inner nodes have sensible positions.
return
for joinedstr in walk(tree, include_joined_str=True):
if not isinstance(joinedstr, ast.JoinedStr):
continue
for part in joinedstr.values:
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
setattr(part, '_broken_positions', True) # use setattr for mypy

if isinstance(part, ast.FormattedValue):
if not fstring_positions_work():
for child in walk(part.value):
setattr(child, '_broken_positions', True)

if part.format_spec: # this is another JoinedStr
# Again, the standard positions span the full f-string.
setattr(part.format_spec, '_broken_positions', True)

else:
def fstring_positions_work():
# type: () -> bool
return False

def annotate_fstring_nodes(_tree):
# type: (ast.AST) -> None
pass
def annotate_fstring_nodes(tree):
# type: (ast.AST) -> None
"""
Add a special attribute `_broken_positions` to nodes inside f-strings
if the lineno/col_offset cannot be trusted.
"""
if sys.version_info >= (3, 12):
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
# In Python 3.12, inner nodes have sensible positions.
return
for joinedstr in walk(tree, include_joined_str=True):
if not isinstance(joinedstr, ast.JoinedStr):
continue
for part in joinedstr.values:
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
setattr(part, '_broken_positions', True) # use setattr for mypy

if isinstance(part, ast.FormattedValue):
if not fstring_positions_work():
for child in walk(part.value):
setattr(child, '_broken_positions', True)

if part.format_spec: # this is another JoinedStr
# Again, the standard positions span the full f-string.
setattr(part.format_spec, '_broken_positions', True)

0 comments on commit 89f02d2

Please sign in to comment.