From a4e97c4f292deae5b8df7536a172e50187fa75e2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 19:17:48 -0500 Subject: [PATCH 01/17] feat: more frontend optimizations optimize get_descendants and get_children get rid of sort_nodes, we can guarantee ordering the old fashioned way optimize VyperNode.__hash__ --- vyper/ast/nodes.py | 58 +++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 0ebe18ab5d..3d2ded24ab 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -28,6 +28,7 @@ NODE_BASE_ATTRIBUTES = ( "_children", + "_descendants", "_depth", "_parent", "ast_type", @@ -211,17 +212,6 @@ def _node_filter(node, filters): return True -def _sort_nodes(node_iterable): - # sorting function for VyperNode.get_children - - def sortkey(key): - return float("inf") if key is None else key - - return sorted( - node_iterable, key=lambda k: (sortkey(k.lineno), sortkey(k.col_offset), k.node_id) - ) - - def _raise_syntax_exc(error_msg: str, ast_struct: dict) -> None: # helper function to raise a SyntaxException from a dict representing a node raise SyntaxException( @@ -257,6 +247,8 @@ class VyperNode: """ __slots__ = NODE_BASE_ATTRIBUTES + NODE_SRC_ATTRIBUTES + + _public_slots = [i for i in __slots__ if not i.startswith("_")] _only_empty_fields: tuple = () _translated_fields: dict = {} @@ -278,6 +270,7 @@ def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): self._children: set = set() self._metadata: NodeMetadata = NodeMetadata() self._original_node = None + self._descendants = None for field_name in NODE_SRC_ATTRIBUTES: # when a source offset is not available, use the parent's source offset @@ -338,7 +331,7 @@ def from_node(cls, node: "VyperNode", **kwargs) -> "VyperNode": ------- Vyper node instance """ - ast_struct = {i: getattr(node, i) for i in VyperNode.__slots__ if not i.startswith("_")} + ast_struct = {i: getattr(node, i) for i in VyperNode._public_slots} ast_struct.update(ast_type=cls.__name__, **kwargs) return cls(**ast_struct) @@ -355,7 +348,7 @@ def get_fields(cls) -> set: return set(i for i in slot_fields if not i.startswith("_")) def __hash__(self): - values = [getattr(self, i, None) for i in VyperNode.__slots__ if not i.startswith("_")] + values = [getattr(self, i, None) for i in VyperNode._public_slots] return hash(tuple(values)) def __deepcopy__(self, memo): @@ -537,7 +530,7 @@ def get_children( list Child nodes matching the filter conditions. """ - children = _sort_nodes(self._children) + children = self._children.copy() if node_type is not None: children = [i for i in children if isinstance(i, node_type)] if reverse: @@ -589,19 +582,32 @@ def get_descendants( list Descendant nodes matching the filter conditions. """ - children = self.get_children(node_type, filters) - for node in self.get_children(): - children.extend(node.get_descendants(node_type, filters)) - if ( - include_self - and (not node_type or isinstance(self, node_type)) - and _node_filter(self, filters) - ): - children.append(self) - result = _sort_nodes(children) + ret = self._get_descendants() + + if not include_self: + ret.pop(0) # pop self + + if node_type: + ret = [node for node in ret if isinstance(node, node_type)] + + if filters is not None: + ret = [node for node in ret if _node_filter(node, filters)] + if reverse: - result.reverse() - return result + ret.reverse() + return ret + + def _get_descendants(self): + if self._descendants is not None: + return self._descendants + + ret = [self] + ret.extend(self._children) + for node in self._children: + ret.extend(node._get_descendants()) + + self._descendants = ret + return ret def get(self, field_str: str) -> Any: """ From d3c5d0a6e73ac3ebdc887cb466c965744f9bc035 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 22:57:44 -0500 Subject: [PATCH 02/17] optimise VyperNode.__init__ --- vyper/ast/nodes.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 3d2ded24ab..161cdb9623 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -274,15 +274,12 @@ def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): for field_name in NODE_SRC_ATTRIBUTES: # when a source offset is not available, use the parent's source offset - value = kwargs.get(field_name) - if kwargs.get(field_name) is None: + value = kwargs.pop(field_name, None) + if value is None: value = getattr(parent, field_name, None) setattr(self, field_name, value) for field_name, value in kwargs.items(): - if field_name in NODE_SRC_ATTRIBUTES: - continue - if field_name in self._translated_fields: field_name = self._translated_fields[field_name] From d4510b448f5545bd918fd8837f337a05ec0eebf9 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 23:13:04 -0500 Subject: [PATCH 03/17] optimize another hotspot --- vyper/semantics/types/primitives.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vyper/semantics/types/primitives.py b/vyper/semantics/types/primitives.py index d383f72ab2..f0001b9191 100644 --- a/vyper/semantics/types/primitives.py +++ b/vyper/semantics/types/primitives.py @@ -251,11 +251,12 @@ def abi_type(self) -> ABIType: return ABI_GIntM(self.bits, self.is_signed) def compare_type(self, other: VyperType) -> bool: - if not super().compare_type(other): - return False - assert isinstance(other, IntegerT) # mypy - - return self.is_signed == other.is_signed and self.bits == other.bits + # hotspot + return ( # noqa: E721 + type(self) == type(other) + and self.is_signed == other.is_signed # type: ignore + and self.bits == other.bits # type: ignore + ) # helper function for readability. From 59466a85f3df25df42abe656e044a7cc837146c6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 23:15:55 -0500 Subject: [PATCH 04/17] use list for children --- vyper/ast/nodes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 161cdb9623..d95ebdda2b 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -267,7 +267,7 @@ def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): Dictionary of fields to be included within the node. """ self.set_parent(parent) - self._children: set = set() + self._children: list = [] self._metadata: NodeMetadata = NodeMetadata() self._original_node = None self._descendants = None @@ -299,7 +299,7 @@ def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): # add to children of parent last to ensure an accurate hash is generated if parent is not None: - parent._children.add(self) + parent._children.append(self) # set parent, can be useful when inserting copied nodes into the AST def set_parent(self, parent: "VyperNode"): @@ -672,7 +672,7 @@ def add_to_body(self, node: VyperNode) -> None: self.body.append(node) node._depth = self._depth + 1 node._parent = self - self._children.add(node) + self._children.append(node) def remove_from_body(self, node: VyperNode) -> None: """ From b181b27f5c08a9dd46f76bbea9289115daf74637 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 23:20:34 -0500 Subject: [PATCH 05/17] use self.__class__ instead of type(self) --- vyper/semantics/types/primitives.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/semantics/types/primitives.py b/vyper/semantics/types/primitives.py index f0001b9191..dd3e4462e7 100644 --- a/vyper/semantics/types/primitives.py +++ b/vyper/semantics/types/primitives.py @@ -253,7 +253,7 @@ def abi_type(self) -> ABIType: def compare_type(self, other: VyperType) -> bool: # hotspot return ( # noqa: E721 - type(self) == type(other) + self.__class__ == other.__class__ and self.is_signed == other.is_signed # type: ignore and self.bits == other.bits # type: ignore ) From 8aad8d68f084d1811a940aa70df08071cbd42ebc Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 23:25:53 -0500 Subject: [PATCH 06/17] fix get_descendants --- vyper/ast/nodes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index d95ebdda2b..40b2df5cf1 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -581,8 +581,10 @@ def get_descendants( """ ret = self._get_descendants() - if not include_self: - ret.pop(0) # pop self + if include_self: + ret = ret.copy() + else: + ret = ret[1:] # pop self if node_type: ret = [node for node in ret if isinstance(node, node_type)] From b53c3824633031f99a1e883511656972627059b6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 16 Feb 2024 23:28:12 -0500 Subject: [PATCH 07/17] rename a variable --- vyper/ast/nodes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 40b2df5cf1..0acecf45d5 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -270,7 +270,7 @@ def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): self._children: list = [] self._metadata: NodeMetadata = NodeMetadata() self._original_node = None - self._descendants = None + self._cache_descendants = None for field_name in NODE_SRC_ATTRIBUTES: # when a source offset is not available, use the parent's source offset @@ -597,15 +597,15 @@ def get_descendants( return ret def _get_descendants(self): - if self._descendants is not None: - return self._descendants + if self._cache_descendants is not None: + return self._cache_descendants ret = [self] ret.extend(self._children) for node in self._children: ret.extend(node._get_descendants()) - self._descendants = ret + self._cache_descendants = ret return ret def get(self, field_str: str) -> Any: From 22e250351adb59e2d4d13adec6f4c42f1a836f84 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 10:33:41 -0500 Subject: [PATCH 08/17] fix slot name --- vyper/ast/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 0acecf45d5..9a6447ce2f 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -28,13 +28,13 @@ NODE_BASE_ATTRIBUTES = ( "_children", - "_descendants", "_depth", "_parent", "ast_type", "node_id", "_metadata", "_original_node", + "_cache_descendants", ) NODE_SRC_ATTRIBUTES = ( "col_offset", From 9a8c34a5bea2a17f04a934460c05174831122fc4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 12:15:14 -0500 Subject: [PATCH 09/17] use iterators for get_descendants --- vyper/ast/nodes.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 9a6447ce2f..68e46d77ce 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -581,32 +581,34 @@ def get_descendants( """ ret = self._get_descendants() - if include_self: - ret = ret.copy() - else: - ret = ret[1:] # pop self + if not include_self: + # pop front + s = next(ret) + assert s == self if node_type: - ret = [node for node in ret if isinstance(node, node_type)] + ret = (node for node in ret if isinstance(node, node_type)) if filters is not None: - ret = [node for node in ret if _node_filter(node, filters)] + ret = (node for node in ret if _node_filter(node, filters)) + + ret = list(ret) if reverse: ret.reverse() + return ret def _get_descendants(self): - if self._cache_descendants is not None: - return self._cache_descendants + if self._cache_descendants is None: + ret = [self] + ret.extend(self._children) + for node in self._children: + ret.extend(node._get_descendants()) - ret = [self] - ret.extend(self._children) - for node in self._children: - ret.extend(node._get_descendants()) + self._cache_descendants = ret - self._cache_descendants = ret - return ret + return iter(self._cache_descendants) def get(self, field_str: str) -> Any: """ From a862fc943f4bfe67ddf7afd37359536421992720 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 12:31:31 -0500 Subject: [PATCH 10/17] remove duplicates in get_descendants --- vyper/ast/nodes.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 68e46d77ce..27043d2a86 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -579,12 +579,7 @@ def get_descendants( list Descendant nodes matching the filter conditions. """ - ret = self._get_descendants() - - if not include_self: - # pop front - s = next(ret) - assert s == self + ret = self._get_descendants(include_self) if node_type: ret = (node for node in ret if isinstance(node, node_type)) @@ -599,16 +594,22 @@ def get_descendants( return ret - def _get_descendants(self): + def _get_descendants(self, include_self=True): + # get descendants in reverse topsort (i.e. breadth-first) order if self._cache_descendants is None: ret = [self] ret.extend(self._children) for node in self._children: - ret.extend(node._get_descendants()) + ret.extend(node._get_descendants(include_self=False)) self._cache_descendants = ret - return iter(self._cache_descendants) + ret = iter(self._cache_descendants) + if not include_self: + s = next(ret) # pop + assert s is self + + return ret def get(self, field_str: str) -> Any: """ From 451712c5297ca56f8fee29f65f603bbf638c5385 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 12:34:19 -0500 Subject: [PATCH 11/17] rewrite get_children with iterators --- vyper/ast/nodes.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 27043d2a86..ec76eb9d31 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -527,14 +527,17 @@ def get_children( list Child nodes matching the filter conditions. """ - children = self._children.copy() + children = iter(self._children) + if node_type is not None: - children = [i for i in children if isinstance(i, node_type)] + children = (i for i in children if isinstance(i, node_type)) + if filters is not None: + children = (i for i in children if _node_filter(i, filters)) + + children = list(children) if reverse: children.reverse() - if filters is None: - return children - return [i for i in children if _node_filter(i, filters)] + return children def get_descendants( self, From 06402ae874a4b83f918729962ff8ff9a4365ac68 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 12:42:16 -0500 Subject: [PATCH 12/17] factor out some common code --- vyper/ast/nodes.py | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index ec76eb9d31..7fa0e2ce90 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -212,6 +212,20 @@ def _node_filter(node, filters): return True +def _apply_filters(node_iter, node_type, filters, reverse): + ret = node_iter + if node_type is not None: + ret = (i for i in ret if isinstance(i, node_type)) + if filters is not None: + ret = (i for i in ret if _node_filter(i, filters)) + + ret = list(ret) + if reverse: + ret.reverse() + return ret + + + def _raise_syntax_exc(error_msg: str, ast_struct: dict) -> None: # helper function to raise a SyntaxException from a dict representing a node raise SyntaxException( @@ -253,6 +267,7 @@ class VyperNode: _translated_fields: dict = {} def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): + # this function is performance-sensitive """ AST node initializer method. @@ -527,17 +542,7 @@ def get_children( list Child nodes matching the filter conditions. """ - children = iter(self._children) - - if node_type is not None: - children = (i for i in children if isinstance(i, node_type)) - if filters is not None: - children = (i for i in children if _node_filter(i, filters)) - - children = list(children) - if reverse: - children.reverse() - return children + return _apply_filters(iter(self._children), node_type, filters, reverse) def get_descendants( self, @@ -546,6 +551,7 @@ def get_descendants( include_self: bool = False, reverse: bool = False, ) -> list: + # this function is performance-sensitive """ Return a list of descendant nodes of this node which match the given filter(s). @@ -583,22 +589,10 @@ def get_descendants( Descendant nodes matching the filter conditions. """ ret = self._get_descendants(include_self) - - if node_type: - ret = (node for node in ret if isinstance(node, node_type)) - - if filters is not None: - ret = (node for node in ret if _node_filter(node, filters)) - - ret = list(ret) - - if reverse: - ret.reverse() - - return ret + return _apply_filters(ret, node_type, filters, reverse) def _get_descendants(self, include_self=True): - # get descendants in reverse topsort (i.e. breadth-first) order + # get descendants in breadth-first order if self._cache_descendants is None: ret = [self] ret.extend(self._children) From 836dda125883485beabe6d72cc21a42b96bd5a8c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 12:43:42 -0500 Subject: [PATCH 13/17] add some performance notes --- vyper/ast/nodes.py | 1 + vyper/semantics/analysis/utils.py | 1 + vyper/semantics/types/primitives.py | 7 ++++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 7fa0e2ce90..54ceb2ded2 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -364,6 +364,7 @@ def __hash__(self): return hash(tuple(values)) def __deepcopy__(self, memo): + # default implementation of deepcopy is a hotspot return pickle.loads(pickle.dumps(self)) def __eq__(self, other): diff --git a/vyper/semantics/analysis/utils.py b/vyper/semantics/analysis/utils.py index abea600d88..21ca7a8d3f 100644 --- a/vyper/semantics/analysis/utils.py +++ b/vyper/semantics/analysis/utils.py @@ -480,6 +480,7 @@ def get_expr_info(node: vy_ast.ExprNode, is_callable: bool = False) -> ExprInfo: def get_common_types(*nodes: vy_ast.VyperNode, filter_fn: Callable = None) -> List: + # this function is a performance hotspot """ Return a list of common possible types between one or more nodes. diff --git a/vyper/semantics/types/primitives.py b/vyper/semantics/types/primitives.py index dd3e4462e7..d11a9595a3 100644 --- a/vyper/semantics/types/primitives.py +++ b/vyper/semantics/types/primitives.py @@ -251,7 +251,12 @@ def abi_type(self) -> ABIType: return ABI_GIntM(self.bits, self.is_signed) def compare_type(self, other: VyperType) -> bool: - # hotspot + # this function is performance sensitive + # originally: + # if not super().compare_type(other): + # return False + # return self.is_signed == other.is_signed and self.bits == other.bits + return ( # noqa: E721 self.__class__ == other.__class__ and self.is_signed == other.is_signed # type: ignore From f815c11d63d16e4d5291d5ea9840fbe1fe4f005c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 18:46:26 -0500 Subject: [PATCH 14/17] switch from breadth first to topsort --- vyper/ast/nodes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 54ceb2ded2..0ed065c435 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -593,16 +593,16 @@ def get_descendants( return _apply_filters(ret, node_type, filters, reverse) def _get_descendants(self, include_self=True): - # get descendants in breadth-first order + # get descendants in topsort order if self._cache_descendants is None: ret = [self] - ret.extend(self._children) for node in self._children: - ret.extend(node._get_descendants(include_self=False)) + ret.extend(node._get_descendants()) self._cache_descendants = ret ret = iter(self._cache_descendants) + if not include_self: s = next(ret) # pop assert s is self From c7b3776274bc48ac3dbbce12f5806a9d441873b6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Feb 2024 18:47:04 -0500 Subject: [PATCH 15/17] fix lint --- vyper/ast/nodes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 0ed065c435..3e15a28512 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -225,7 +225,6 @@ def _apply_filters(node_iter, node_type, filters, reverse): return ret - def _raise_syntax_exc(error_msg: str, ast_struct: dict) -> None: # helper function to raise a SyntaxException from a dict representing a node raise SyntaxException( From bbbb76eb2b8b362b255acbaab8f3712b7ccebe07 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Feb 2024 14:28:31 -0500 Subject: [PATCH 16/17] optimize tokenization --- vyper/ast/parse.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/vyper/ast/parse.py b/vyper/ast/parse.py index a10a840da0..607174a05b 100644 --- a/vyper/ast/parse.py +++ b/vyper/ast/parse.py @@ -1,6 +1,8 @@ import ast as python_ast +import io import tokenize from decimal import Decimal +from functools import cached_property from typing import Any, Dict, List, Optional, Union, cast import asttokens @@ -266,6 +268,12 @@ def visit_ClassDef(self, node): node.ast_type = self._modification_offsets[(node.lineno, node.col_offset)] return node + @cached_property + def _dummy_tokens(self): + bytez = "dummy_target:\\\n foo".encode("utf-8") + token_list = list(tokenize.tokenize(io.BytesIO(bytez).readline))[:3] + return token_list + def visit_For(self, node): """ Visit a For node, splicing in the loop variable annotation provided by @@ -300,8 +308,19 @@ def visit_For(self, node): # in a bit, but for now lets us keep the line/col offset, and # *also* gives us a valid AST. it doesn't matter what the dummy # target name is, since it gets removed in a few lines. + + # tokenization is a perf hotspot, so we manually construct the token + # list to pass to ASTTokens. + annotation_tokens = self._dummy_tokens + annotation_tokens + + # ensure tokens are properly terminated + endline = annotation_tokens[-1].start[0] + annotation_tokens.append( + tokenize.TokenInfo( + type=tokenize.ENDMARKER, string="", start=(endline, 0), end=(endline, 0), line="" + ) + ) annotation_str = tokenize.untokenize(annotation_tokens) - annotation_str = "dummy_target:" + annotation_str try: fake_node = python_ast.parse(annotation_str).body[0] @@ -310,10 +329,8 @@ def visit_For(self, node): "invalid type annotation", self._source_code, node.lineno, node.col_offset ) from e - # fill in with asttokens info. note we can use `self._tokens` because - # it is indented to exactly the same position where it appeared - # in the original source! - self._tokens.mark_tokens(fake_node) + # fill in with asttokens info. + asttokens.ASTTokens(annotation_str, tree=fake_node, tokens=annotation_tokens) # replace the dummy target name with the real target name. fake_node.target = node.target From 01ec986bd53db8d20e7d5cba00da835417ab1815 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Feb 2024 14:48:44 -0500 Subject: [PATCH 17/17] Revert "optimize tokenization" it was not a good idea! This reverts commit bbbb76eb2b8b362b255acbaab8f3712b7ccebe07. --- vyper/ast/parse.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/vyper/ast/parse.py b/vyper/ast/parse.py index 607174a05b..a10a840da0 100644 --- a/vyper/ast/parse.py +++ b/vyper/ast/parse.py @@ -1,8 +1,6 @@ import ast as python_ast -import io import tokenize from decimal import Decimal -from functools import cached_property from typing import Any, Dict, List, Optional, Union, cast import asttokens @@ -268,12 +266,6 @@ def visit_ClassDef(self, node): node.ast_type = self._modification_offsets[(node.lineno, node.col_offset)] return node - @cached_property - def _dummy_tokens(self): - bytez = "dummy_target:\\\n foo".encode("utf-8") - token_list = list(tokenize.tokenize(io.BytesIO(bytez).readline))[:3] - return token_list - def visit_For(self, node): """ Visit a For node, splicing in the loop variable annotation provided by @@ -308,19 +300,8 @@ def visit_For(self, node): # in a bit, but for now lets us keep the line/col offset, and # *also* gives us a valid AST. it doesn't matter what the dummy # target name is, since it gets removed in a few lines. - - # tokenization is a perf hotspot, so we manually construct the token - # list to pass to ASTTokens. - annotation_tokens = self._dummy_tokens + annotation_tokens - - # ensure tokens are properly terminated - endline = annotation_tokens[-1].start[0] - annotation_tokens.append( - tokenize.TokenInfo( - type=tokenize.ENDMARKER, string="", start=(endline, 0), end=(endline, 0), line="" - ) - ) annotation_str = tokenize.untokenize(annotation_tokens) + annotation_str = "dummy_target:" + annotation_str try: fake_node = python_ast.parse(annotation_str).body[0] @@ -329,8 +310,10 @@ def visit_For(self, node): "invalid type annotation", self._source_code, node.lineno, node.col_offset ) from e - # fill in with asttokens info. - asttokens.ASTTokens(annotation_str, tree=fake_node, tokens=annotation_tokens) + # fill in with asttokens info. note we can use `self._tokens` because + # it is indented to exactly the same position where it appeared + # in the original source! + self._tokens.mark_tokens(fake_node) # replace the dummy target name with the real target name. fake_node.target = node.target