More cleanup of code for Python before 3.8, fix astroid import (#156)

gristlabs · Nov 26, 2024 · 89f02d2 · 89f02d2
1 parent eb1e401
commit 89f02d2
Show file tree

Hide file tree

Showing 4 changed files with 110 additions and 151 deletions.
diff --git a/README.rst b/README.rst
@@ -32,7 +32,6 @@ The API Reference is here: http://asttokens.readthedocs.io/en/latest/api-index.h
 
 Usage
 -----
-ASTTokens works with both Python2 and Python3.
 
 ASTTokens can annotate both trees built by `ast <https://docs.python.org/2/library/ast.html>`_,
 AND those built by `astroid <https://github.com/PyCQA/astroid>`_.

diff --git a/asttokens/asttokens.py b/asttokens/asttokens.py
@@ -104,9 +104,6 @@ class ASTTokens(ASTTextBase):
 
   def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
     # type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None
-    # FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
-    # https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
-
     super(ASTTokens, self).__init__(source_text, filename)
 
     self._tree = ast.parse(source_text, filename) if parse else tree
@@ -292,9 +289,6 @@ class ASTText(ASTTextBase):
   """
   def __init__(self, source_text, tree=None, filename='<unknown>'):
     # type: (Any, Optional[Module], str) -> None
-    # FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
-    # https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
-
     super(ASTText, self).__init__(source_text, filename)
 
     self._tree = tree
@@ -327,10 +321,6 @@ def _get_text_positions_tokenless(self, node, padded):
     """
     Version of ``get_text_positions()`` that doesn't use tokens.
     """
-    if sys.version_info[:2] < (3, 8):  # pragma: no cover
-      # This is just for mpypy
-      raise AssertionError("This method should only be called internally after checking supports_tokenless()")
-
     if is_module(node):
       # Modules don't have position info, so just return the range of the whole text.
       # The token-using method does something different, but its behavior seems weird and inconsistent.
@@ -413,16 +403,14 @@ def get_text_positions(self, node, padded):
     return self.asttokens.get_text_positions(node, padded)
 
 
-# Node types that _get_text_positions_tokenless doesn't support. Only relevant for Python 3.8+.
-_unsupported_tokenless_types = ()  # type: Tuple[str, ...]
-if sys.version_info[:2] >= (3, 8):
-  # no lineno
-  _unsupported_tokenless_types += ("arguments", "Arguments", "withitem")
-  if sys.version_info[:2] == (3, 8):
-    # _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
-    _unsupported_tokenless_types += ("arg", "Starred")
-    # no lineno in 3.8
-    _unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
+# Node types that _get_text_positions_tokenless doesn't support.
+# These initial values are missing lineno.
+_unsupported_tokenless_types = ("arguments", "Arguments", "withitem")  # type: Tuple[str, ...]
+if sys.version_info[:2] == (3, 8):
+  # _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
+  _unsupported_tokenless_types += ("arg", "Starred")
+  # no lineno in 3.8
+  _unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
 
 
 def supports_tokenless(node=None):
@@ -434,7 +422,6 @@ def supports_tokenless(node=None):
 
   The following cases are not supported:
 
-    - Python 3.7 and earlier
     - PyPy
     - ``ast.arguments`` / ``astroid.Arguments``
     - ``ast.withitem``
@@ -459,6 +446,5 @@ def supports_tokenless(node=None):
           )
         )
       )
-      and sys.version_info[:2] >= (3, 8)
       and 'pypy' not in sys.version.lower()
   )
diff --git a/asttokens/mark_tokens.py b/asttokens/mark_tokens.py
@@ -21,7 +21,6 @@
 
 from . import util
 from .asttokens import ASTTokens
-from .util import AstConstant
 from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer
 
 if TYPE_CHECKING:
@@ -177,13 +176,6 @@ def handle_comp(self, open_brace, node, first_token, last_token):
     util.expect_token(before, token.OP, open_brace)
     return (before, last_token)
 
-  # Python 3.8 fixed the starting position of list comprehensions:
-  # https://bugs.python.org/issue31241
-  if sys.version_info < (3, 8):
-    def visit_listcomp(self, node, first_token, last_token):
-      # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
-      return self.handle_comp('[', node, first_token, last_token)
-
   def visit_comprehension(self,
                           node,  # type: AstNode
                           first_token,  # type: util.Token
@@ -296,26 +288,19 @@ def handle_bare_tuple(self, node, first_token, last_token):
       last_token = maybe_comma
     return (first_token, last_token)
 
-  if sys.version_info >= (3, 8):
-    # In Python3.8 parsed tuples include parentheses when present.
-    def handle_tuple_nonempty(self, node, first_token, last_token):
-      # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
-      assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
-      # It's a bare tuple if the first token belongs to the first child. The first child may
-      # include extraneous parentheses (which don't create new nodes), so account for those too.
-      child = node.elts[0]
-      if TYPE_CHECKING:
-        child = cast(AstNode, child)
-      child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
-      if first_token == child_first:
-        return self.handle_bare_tuple(node, first_token, last_token)
-      return (first_token, last_token)
-  else:
-    # Before python 3.8, parsed tuples do not include parens.
-    def handle_tuple_nonempty(self, node, first_token, last_token):
-      # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
-      (first_token, last_token) = self.handle_bare_tuple(node, first_token, last_token)
-      return self._gobble_parens(first_token, last_token, False)
+  # In Python3.8 parsed tuples include parentheses when present.
+  def handle_tuple_nonempty(self, node, first_token, last_token):
+    # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
+    assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
+    # It's a bare tuple if the first token belongs to the first child. The first child may
+    # include extraneous parentheses (which don't create new nodes), so account for those too.
+    child = node.elts[0]
+    if TYPE_CHECKING:
+      child = cast(AstNode, child)
+    child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
+    if first_token == child_first:
+      return self.handle_bare_tuple(node, first_token, last_token)
+    return (first_token, last_token)
 
   def visit_tuple(self, node, first_token, last_token):
     # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
@@ -417,19 +402,15 @@ def visit_num(self, node, first_token, last_token):
     # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
     return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token)
 
-  # In Astroid, the Num and Str nodes are replaced by Const.
   def visit_const(self, node, first_token, last_token):
     # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
-    assert isinstance(node, AstConstant) or isinstance(node, nc.Const)
+    assert isinstance(node, ast.Constant) or isinstance(node, nc.Const)
     if isinstance(node.value, numbers.Number):
       return self.handle_num(node, node.value, first_token, last_token)
     elif isinstance(node.value, (str, bytes)):
       return self.visit_str(node, first_token, last_token)
     return (first_token, last_token)
 
-  # In Python >= 3.6, there is a similar class 'Constant' for literals
-  # In 3.8 it became the type produced by ast.parse
-  # https://bugs.python.org/issue32892
   visit_constant = visit_const
 
   def visit_keyword(self, node, first_token, last_token):

diff --git a/asttokens/util.py b/asttokens/util.py
@@ -20,10 +20,21 @@
 import tokenize
 from abc import ABCMeta
 from ast import Module, expr, AST
-from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING
-
-import astroid
-
+from functools import lru_cache
+from typing import (
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+    Any,
+    TYPE_CHECKING,
+    Type,
+)
 
 if TYPE_CHECKING:  # pragma: no cover
   from .astroid_compat import NodeNG
@@ -67,13 +78,6 @@ def __str__(self):
     return token_repr(self.type, self.string)
 
 
-if sys.version_info >= (3, 6):
-  AstConstant = ast.Constant
-else:
-  class AstConstant:
-    value = object()
-
-
 def match_token(token, tok_type, tok_str=None):
   # type: (Token, int, Optional[str]) -> bool
   """Returns true if token is of the given type and, if a string is given, has that string."""
@@ -91,22 +95,13 @@ def expect_token(token, tok_type, tok_str=None):
       token_repr(tok_type, tok_str), str(token),
       token.start[0], token.start[1] + 1))
 
-# These were previously defined in tokenize.py and distinguishable by being greater than
-# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly.
-if sys.version_info >= (3, 7):
-  def is_non_coding_token(token_type):
-    # type: (int) -> bool
-    """
-    These are considered non-coding tokens, as they don't affect the syntax tree.
-    """
-    return token_type in (token.NL, token.COMMENT, token.ENCODING)
-else:
-  def is_non_coding_token(token_type):
-    # type: (int) -> bool
-    """
-    These are considered non-coding tokens, as they don't affect the syntax tree.
-    """
-    return token_type >= token.N_TOKENS
+
+def is_non_coding_token(token_type):
+  # type: (int) -> bool
+  """
+  These are considered non-coding tokens, as they don't affect the syntax tree.
+  """
+  return token_type in (token.NL, token.COMMENT, token.ENCODING)
 
 
 def generate_tokens(text):
@@ -201,10 +196,19 @@ def is_expr_stmt(node):
   return node.__class__.__name__ == 'Expr'
 
 
+
+CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,)
+try:
+  from astroid import Const
+  CONSTANT_CLASSES += (Const,)
+except ImportError:  # pragma: no cover
+  # astroid is not available
+  pass
+
 def is_constant(node):
   # type: (AstNode) -> bool
   """Returns whether node is a Constant node."""
-  return isinstance(node, (ast.Constant, astroid.Const))
+  return isinstance(node, CONSTANT_CLASSES)
 
 
 def is_ellipsis(node):
@@ -421,72 +425,61 @@ def last_stmt(node):
   return node
 
 
-if sys.version_info[:2] >= (3, 8):
-  from functools import lru_cache
 
-  @lru_cache(maxsize=None)
-  def fstring_positions_work():
-    # type: () -> bool
-    """
-    The positions attached to nodes inside f-string FormattedValues have some bugs
-    that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
-    This checks for those bugs more concretely without relying on the Python version.
-    Specifically this checks:
-     - Values with a format spec or conversion
-     - Repeated (i.e. identical-looking) expressions
-     - f-strings implicitly concatenated over multiple lines.
-     - Multiline, triple-quoted f-strings.
-    """
-    source = """(
-      f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
-      f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
-      f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
-      f'''
-      {s} {t}
-      {u} {v}
-      '''
-    )"""
-    tree = ast.parse(source)
-    name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
-    name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
-    positions_are_unique = len(set(name_positions)) == len(name_positions)
-    correct_source_segments = all(
-      ast.get_source_segment(source, node) == node.id
-      for node in name_nodes
-    )
-    return positions_are_unique and correct_source_segments
+@lru_cache(maxsize=None)
+def fstring_positions_work():
+  # type: () -> bool
+  """
+  The positions attached to nodes inside f-string FormattedValues have some bugs
+  that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
+  This checks for those bugs more concretely without relying on the Python version.
+  Specifically this checks:
+   - Values with a format spec or conversion
+   - Repeated (i.e. identical-looking) expressions
+   - f-strings implicitly concatenated over multiple lines.
+   - Multiline, triple-quoted f-strings.
+  """
+  source = """(
+    f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
+    f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
+    f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
+    f'''
+    {s} {t}
+    {u} {v}
+    '''
+  )"""
+  tree = ast.parse(source)
+  name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
+  name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
+  positions_are_unique = len(set(name_positions)) == len(name_positions)
+  correct_source_segments = all(
+    ast.get_source_segment(source, node) == node.id
+    for node in name_nodes
+  )
+  return positions_are_unique and correct_source_segments
 
-  def annotate_fstring_nodes(tree):
-    # type: (ast.AST) -> None
-    """
-    Add a special attribute `_broken_positions` to nodes inside f-strings
-    if the lineno/col_offset cannot be trusted.
-    """
-    if sys.version_info >= (3, 12):
-      # f-strings were weirdly implemented until https://peps.python.org/pep-0701/
-      # In Python 3.12, inner nodes have sensible positions.
-      return
-    for joinedstr in walk(tree, include_joined_str=True):
-      if not isinstance(joinedstr, ast.JoinedStr):
-        continue
-      for part in joinedstr.values:
-        # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
-        setattr(part, '_broken_positions', True)  # use setattr for mypy
-
-        if isinstance(part, ast.FormattedValue):
-          if not fstring_positions_work():
-            for child in walk(part.value):
-              setattr(child, '_broken_positions', True)
-
-          if part.format_spec:  # this is another JoinedStr
-            # Again, the standard positions span the full f-string.
-            setattr(part.format_spec, '_broken_positions', True)
-
-else:
-  def fstring_positions_work():
-    # type: () -> bool
-    return False
-
-  def annotate_fstring_nodes(_tree):
-    # type: (ast.AST) -> None
-    pass
+def annotate_fstring_nodes(tree):
+  # type: (ast.AST) -> None
+  """
+  Add a special attribute `_broken_positions` to nodes inside f-strings
+  if the lineno/col_offset cannot be trusted.
+  """
+  if sys.version_info >= (3, 12):
+    # f-strings were weirdly implemented until https://peps.python.org/pep-0701/
+    # In Python 3.12, inner nodes have sensible positions.
+    return
+  for joinedstr in walk(tree, include_joined_str=True):
+    if not isinstance(joinedstr, ast.JoinedStr):
+      continue
+    for part in joinedstr.values:
+      # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
+      setattr(part, '_broken_positions', True)  # use setattr for mypy
+
+      if isinstance(part, ast.FormattedValue):
+        if not fstring_positions_work():
+          for child in walk(part.value):
+            setattr(child, '_broken_positions', True)
+
+        if part.format_spec:  # this is another JoinedStr
+          # Again, the standard positions span the full f-string.
+          setattr(part.format_spec, '_broken_positions', True)