From a7d851b5b360db15b0eb3aed9ecea41c28055b18 Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Wed, 28 Aug 2024 03:16:51 +0200 Subject: [PATCH 1/7] ignore Tap from class variables parsing --- src/tap/tap.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/tap/tap.py b/src/tap/tap.py index 09a5134..79f6147 100644 --- a/src/tap/tap.py +++ b/src/tap/tap.py @@ -483,7 +483,9 @@ def parse_args( return self @classmethod - def _get_from_self_and_super(cls, extract_func: Callable[[type], dict]) -> Union[Dict[str, Any], Dict]: + def _get_from_self_and_super( + cls, extract_func: Callable[[type], dict], *, need_tap_cls: bool = True, + ) -> Union[Dict[str, Any], Dict]: """Returns a dictionary mapping variable names to values. Variables and values are extracted from classes using key starting @@ -494,6 +496,7 @@ def _get_from_self_and_super(cls, extract_func: Callable[[type], dict]) -> Union Super classes are traversed through breadth first search. :param extract_func: A function that extracts from a class a dictionary mapping variables to values. + :param need_tap_cls: If False, variables from the Tap and ArgumentParser classes are ignored. :return: A dictionary mapping variable names to values from the class dict. """ visited = set() @@ -503,6 +506,9 @@ def _get_from_self_and_super(cls, extract_func: Callable[[type], dict]) -> Union while len(super_classes) > 0: super_class = super_classes.pop(0) + if not need_tap_cls and super_class is Tap: + break + if super_class not in visited and issubclass(super_class, Tap): super_dictionary = extract_func(super_class) @@ -521,7 +527,8 @@ def _get_from_self_and_super(cls, extract_func: Callable[[type], dict]) -> Union def _get_class_dict(self) -> Dict[str, Any]: """Returns a dictionary mapping class variable names to values from the class dict.""" class_dict = self._get_from_self_and_super( - extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())) + extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())), + need_tap_cls=False, ) class_dict = { var: val @@ -529,9 +536,7 @@ def _get_class_dict(self) -> Dict[str, Any]: if not ( var.startswith("_") or callable(val) - or isinstance(val, staticmethod) - or isinstance(val, classmethod) - or isinstance(val, property) + or isinstance(val, (staticmethod, classmethod, property)) ) } @@ -539,7 +544,10 @@ def _get_class_dict(self) -> Dict[str, Any]: def _get_annotations(self) -> Dict[str, Any]: """Returns a dictionary mapping variable names to their type annotations.""" - return self._get_from_self_and_super(extract_func=lambda super_class: dict(get_type_hints(super_class))) + return self._get_from_self_and_super( + extract_func=lambda super_class: dict(get_type_hints(super_class)), + need_tap_cls=False, + ) def _get_class_variables(self) -> dict: """Returns a dictionary mapping class variables names to their additional information.""" @@ -547,7 +555,7 @@ def _get_class_variables(self) -> dict: try: class_variables = self._get_from_self_and_super( - extract_func=lambda super_class: get_class_variables(super_class) + extract_func=get_class_variables, need_tap_cls=False, ) # Handle edge-case of source code modification while code is running @@ -588,7 +596,8 @@ def as_dict(self) -> Dict[str, Any]: self_dict = self.__dict__ class_dict = self._get_from_self_and_super( - extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())) + extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())), + need_tap_cls=False, ) class_dict = {key: val for key, val in class_dict.items() if key not in self_dict} stored_dict = {**self_dict, **class_dict} From 03c9dc4d71ded3f1c930ebd8d3f8a84e32286eca Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Wed, 28 Aug 2024 03:30:45 +0200 Subject: [PATCH 2/7] only calls inspect.getsource and tokenize.generated tokens once --- src/tap/utils.py | 59 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/src/tap/utils.py b/src/tap/utils.py index 9f22c34..872cc1f 100644 --- a/src/tap/utils.py +++ b/src/tap/utils.py @@ -18,6 +18,7 @@ Callable, Dict, Generator, + Iterable, Iterator, List, Literal, @@ -184,17 +185,20 @@ def is_positional_arg(*name_or_flags) -> bool: return not is_option_arg(*name_or_flags) +def _tokenize_source(source: str) -> Generator[tokenize.TokenInfo, None, None]: + """Returns a generator for the tokens of the object's source code, given the source code.""" + return tokenize.generate_tokens(StringIO(source).readline) + + def tokenize_source(obj: object) -> Generator: """Returns a generator for the tokens of the object's source code.""" - source = inspect.getsource(obj) - token_generator = tokenize.generate_tokens(StringIO(source).readline) - return token_generator + return _tokenize_source(inspect.getsource(obj)) -def get_class_column(obj: type) -> int: - """Determines the column number for class variables in a class.""" +def _get_class_column(tokens: Iterable[tokenize.TokenInfo]) -> int: + """Determines the column number for class variables in a class, given the tokens of the class.""" first_line = 1 - for token_type, token, (start_line, start_column), (end_line, end_column), line in tokenize_source(obj): + for token_type, token, (start_line, start_column), (end_line, end_column), line in tokens: if token.strip() == "@": first_line += 1 if start_line <= first_line or token.strip() == "": @@ -203,10 +207,18 @@ def get_class_column(obj: type) -> int: return start_column -def source_line_to_tokens(obj: object) -> Dict[int, List[Dict[str, Union[str, int]]]]: - """Gets a dictionary mapping from line number to a dictionary of tokens on that line for an object's source code.""" +def get_class_column(cls: type) -> int: + """Determines the column number for class variables in a class.""" + return _get_class_column(tokenize_source(cls)) + + +def _source_line_to_tokens(tokens: Iterable[tokenize.TokenInfo]) -> Dict[int, List[Dict[str, Union[str, int]]]]: + """ + Gets a dictionary mapping from line number to a dictionary of tokens on that line for an object's source code, + given the tokens of the object's source code. + """ line_to_tokens = {} - for token_type, token, (start_line, start_column), (end_line, end_column), line in tokenize_source(obj): + for token_type, token, (start_line, start_column), (end_line, end_column), line in tokens: line_to_tokens.setdefault(start_line, []).append({ 'token_type': token_type, 'token': token, @@ -220,13 +232,19 @@ def source_line_to_tokens(obj: object) -> Dict[int, List[Dict[str, Union[str, in return line_to_tokens -def get_subsequent_assign_lines(cls: type) -> Set[int]: - """For all multiline assign statements, get the line numbers after the first line of the assignment.""" - # Get source code of class - source = inspect.getsource(cls) +def source_line_to_tokens(obj: object) -> Dict[int, List[Dict[str, Union[str, int]]]]: + """Gets a dictionary mapping from line number to a dictionary of tokens on that line for an object's source code.""" + return _source_line_to_tokens(tokenize_source(obj)) + + +def _get_subsequent_assign_lines(source_cls: str) -> Set[int]: + """ + For all multiline assign statements, get the line numbers after the first line of the assignment, + given the source code of the object. + """ # Parse source code using ast (with an if statement to avoid indentation errors) - source = f"if True:\n{textwrap.indent(source, ' ')}" + source = f"if True:\n{textwrap.indent(source_cls, ' ')}" body = ast.parse(source).body[0] # Set up warning message @@ -265,18 +283,25 @@ def get_subsequent_assign_lines(cls: type) -> Set[int]: return assign_lines +def get_subsequent_assign_lines(cls: type) -> Set[int]: + """For all multiline assign statements, get the line numbers after the first line of the assignment.""" + return _get_subsequent_assign_lines(inspect.getsource(cls)) def get_class_variables(cls: type) -> Dict[str, Dict[str, str]]: """Returns a dictionary mapping class variables to their additional information (currently just comments).""" + # Get the source code and tokens of the class + source_cls = inspect.getsource(cls) + tokens = tuple(_tokenize_source(source_cls)) + # Get mapping from line number to tokens - line_to_tokens = source_line_to_tokens(cls) + line_to_tokens = _source_line_to_tokens(tokens) # Get class variable column number - class_variable_column = get_class_column(cls) + class_variable_column = _get_class_column(tokens) # For all multiline assign statements, get the line numbers after the first line of the assignment # This is used to avoid identifying comments in multiline assign statements - subsequent_assign_lines = get_subsequent_assign_lines(cls) + subsequent_assign_lines = _get_subsequent_assign_lines(source_cls) # Extract class variables class_variable = None From 96182f8b147c235aa7fd1df0959d68c8bcf803d7 Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Wed, 28 Aug 2024 03:49:55 +0200 Subject: [PATCH 3/7] remove the need_tap argument since it is not used --- src/tap/tap.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/tap/tap.py b/src/tap/tap.py index 79f6147..c925c6e 100644 --- a/src/tap/tap.py +++ b/src/tap/tap.py @@ -483,9 +483,7 @@ def parse_args( return self @classmethod - def _get_from_self_and_super( - cls, extract_func: Callable[[type], dict], *, need_tap_cls: bool = True, - ) -> Union[Dict[str, Any], Dict]: + def _get_from_self_and_super( cls, extract_func: Callable[[type], dict]) -> Union[Dict[str, Any], Dict]: """Returns a dictionary mapping variable names to values. Variables and values are extracted from classes using key starting @@ -496,7 +494,6 @@ def _get_from_self_and_super( Super classes are traversed through breadth first search. :param extract_func: A function that extracts from a class a dictionary mapping variables to values. - :param need_tap_cls: If False, variables from the Tap and ArgumentParser classes are ignored. :return: A dictionary mapping variable names to values from the class dict. """ visited = set() @@ -506,10 +503,7 @@ def _get_from_self_and_super( while len(super_classes) > 0: super_class = super_classes.pop(0) - if not need_tap_cls and super_class is Tap: - break - - if super_class not in visited and issubclass(super_class, Tap): + if super_class not in visited and issubclass(super_class, Tap) and super_class is not Tap: super_dictionary = extract_func(super_class) # Update only unseen variables to avoid overriding subclass values @@ -527,8 +521,7 @@ def _get_from_self_and_super( def _get_class_dict(self) -> Dict[str, Any]: """Returns a dictionary mapping class variable names to values from the class dict.""" class_dict = self._get_from_self_and_super( - extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())), - need_tap_cls=False, + extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())) ) class_dict = { var: val @@ -545,8 +538,7 @@ def _get_class_dict(self) -> Dict[str, Any]: def _get_annotations(self) -> Dict[str, Any]: """Returns a dictionary mapping variable names to their type annotations.""" return self._get_from_self_and_super( - extract_func=lambda super_class: dict(get_type_hints(super_class)), - need_tap_cls=False, + extract_func=lambda super_class: dict(get_type_hints(super_class)) ) def _get_class_variables(self) -> dict: @@ -554,9 +546,7 @@ def _get_class_variables(self) -> dict: class_variable_names = {**self._get_annotations(), **self._get_class_dict()}.keys() try: - class_variables = self._get_from_self_and_super( - extract_func=get_class_variables, need_tap_cls=False, - ) + class_variables = self._get_from_self_and_super(extract_func=get_class_variables) # Handle edge-case of source code modification while code is running variables_to_add = class_variable_names - class_variables.keys() @@ -597,7 +587,6 @@ def as_dict(self) -> Dict[str, Any]: self_dict = self.__dict__ class_dict = self._get_from_self_and_super( extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())), - need_tap_cls=False, ) class_dict = {key: val for key, val in class_dict.items() if key not in self_dict} stored_dict = {**self_dict, **class_dict} From a9f931572f6ae568b45677b53dffb21c6c2ed141 Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Wed, 28 Aug 2024 03:55:03 +0200 Subject: [PATCH 4/7] formatting --- src/tap/tap.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/tap/tap.py b/src/tap/tap.py index c925c6e..c2460a7 100644 --- a/src/tap/tap.py +++ b/src/tap/tap.py @@ -483,7 +483,7 @@ def parse_args( return self @classmethod - def _get_from_self_and_super( cls, extract_func: Callable[[type], dict]) -> Union[Dict[str, Any], Dict]: + def _get_from_self_and_super(cls, extract_func: Callable[[type], dict]) -> Union[Dict[str, Any], Dict]: """Returns a dictionary mapping variable names to values. Variables and values are extracted from classes using key starting @@ -537,9 +537,7 @@ def _get_class_dict(self) -> Dict[str, Any]: def _get_annotations(self) -> Dict[str, Any]: """Returns a dictionary mapping variable names to their type annotations.""" - return self._get_from_self_and_super( - extract_func=lambda super_class: dict(get_type_hints(super_class)) - ) + return self._get_from_self_and_super(extract_func=lambda super_class: dict(get_type_hints(super_class))) def _get_class_variables(self) -> dict: """Returns a dictionary mapping class variables names to their additional information.""" @@ -586,7 +584,7 @@ def as_dict(self) -> Dict[str, Any]: self_dict = self.__dict__ class_dict = self._get_from_self_and_super( - extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())), + extract_func=lambda super_class: dict(getattr(super_class, "__dict__", dict())) ) class_dict = {key: val for key, val in class_dict.items() if key not in self_dict} stored_dict = {**self_dict, **class_dict} From d3b1874c18c37e841755fee97bbaf888df23c5ba Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Thu, 29 Aug 2024 19:53:28 +0200 Subject: [PATCH 5/7] Remove unused functions, adjust the tests for the new ones --- src/tap/utils.py | 35 +++++++++-------------------------- tests/test_utils.py | 20 ++++++++++++++------ 2 files changed, 23 insertions(+), 32 deletions(-) diff --git a/src/tap/utils.py b/src/tap/utils.py index 872cc1f..793a3e0 100644 --- a/src/tap/utils.py +++ b/src/tap/utils.py @@ -185,17 +185,12 @@ def is_positional_arg(*name_or_flags) -> bool: return not is_option_arg(*name_or_flags) -def _tokenize_source(source: str) -> Generator[tokenize.TokenInfo, None, None]: +def tokenize_source(source: str) -> Generator[tokenize.TokenInfo, None, None]: """Returns a generator for the tokens of the object's source code, given the source code.""" return tokenize.generate_tokens(StringIO(source).readline) -def tokenize_source(obj: object) -> Generator: - """Returns a generator for the tokens of the object's source code.""" - return _tokenize_source(inspect.getsource(obj)) - - -def _get_class_column(tokens: Iterable[tokenize.TokenInfo]) -> int: +def get_class_column(tokens: Iterable[tokenize.TokenInfo]) -> int: """Determines the column number for class variables in a class, given the tokens of the class.""" first_line = 1 for token_type, token, (start_line, start_column), (end_line, end_column), line in tokens: @@ -205,14 +200,10 @@ def _get_class_column(tokens: Iterable[tokenize.TokenInfo]) -> int: continue return start_column + raise ValueError("Could not find any class variables in the class.") -def get_class_column(cls: type) -> int: - """Determines the column number for class variables in a class.""" - return _get_class_column(tokenize_source(cls)) - - -def _source_line_to_tokens(tokens: Iterable[tokenize.TokenInfo]) -> Dict[int, List[Dict[str, Union[str, int]]]]: +def source_line_to_tokens(tokens: Iterable[tokenize.TokenInfo]) -> Dict[int, List[Dict[str, Union[str, int]]]]: """ Gets a dictionary mapping from line number to a dictionary of tokens on that line for an object's source code, given the tokens of the object's source code. @@ -232,12 +223,7 @@ def _source_line_to_tokens(tokens: Iterable[tokenize.TokenInfo]) -> Dict[int, Li return line_to_tokens -def source_line_to_tokens(obj: object) -> Dict[int, List[Dict[str, Union[str, int]]]]: - """Gets a dictionary mapping from line number to a dictionary of tokens on that line for an object's source code.""" - return _source_line_to_tokens(tokenize_source(obj)) - - -def _get_subsequent_assign_lines(source_cls: str) -> Set[int]: +def get_subsequent_assign_lines(source_cls: str) -> Set[int]: """ For all multiline assign statements, get the line numbers after the first line of the assignment, given the source code of the object. @@ -283,25 +269,22 @@ def _get_subsequent_assign_lines(source_cls: str) -> Set[int]: return assign_lines -def get_subsequent_assign_lines(cls: type) -> Set[int]: - """For all multiline assign statements, get the line numbers after the first line of the assignment.""" - return _get_subsequent_assign_lines(inspect.getsource(cls)) def get_class_variables(cls: type) -> Dict[str, Dict[str, str]]: """Returns a dictionary mapping class variables to their additional information (currently just comments).""" # Get the source code and tokens of the class source_cls = inspect.getsource(cls) - tokens = tuple(_tokenize_source(source_cls)) + tokens = tuple(tokenize_source(source_cls)) # Get mapping from line number to tokens - line_to_tokens = _source_line_to_tokens(tokens) + line_to_tokens = source_line_to_tokens(tokens) # Get class variable column number - class_variable_column = _get_class_column(tokens) + class_variable_column = get_class_column(tokens) # For all multiline assign statements, get the line numbers after the first line of the assignment # This is used to avoid identifying comments in multiline assign statements - subsequent_assign_lines = _get_subsequent_assign_lines(source_cls) + subsequent_assign_lines = get_subsequent_assign_lines(source_cls) # Extract class variables class_variable = None diff --git a/tests/test_utils.py b/tests/test_utils.py index 33729ca..c349b49 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ from argparse import ArgumentTypeError +import inspect import json import os import subprocess @@ -11,6 +12,7 @@ get_class_column, get_class_variables, GitInfo, + tokenize_source, type_to_str, get_literals, TupleTypeEnforcer, @@ -145,7 +147,8 @@ def test_column_simple(self): class SimpleColumn: arg = 2 - self.assertEqual(get_class_column(SimpleColumn), 12) + tokens = tokenize_source(inspect.getsource(SimpleColumn)) + self.assertEqual(get_class_column(tokens), 12) def test_column_comment(self): class CommentColumn: @@ -158,28 +161,32 @@ class CommentColumn: arg = 2 - self.assertEqual(get_class_column(CommentColumn), 12) + tokens = tokenize_source(inspect.getsource(CommentColumn)) + self.assertEqual(get_class_column(tokens), 12) def test_column_space(self): class SpaceColumn: arg = 2 - self.assertEqual(get_class_column(SpaceColumn), 12) + tokens = tokenize_source(inspect.getsource(SpaceColumn)) + self.assertEqual(get_class_column(tokens), 12) def test_column_method(self): class FuncColumn: def func(self): pass - self.assertEqual(get_class_column(FuncColumn), 12) + tokens = tokenize_source(inspect.getsource(FuncColumn)) + self.assertEqual(get_class_column(tokens), 12) def test_dataclass(self): @class_decorator class DataclassColumn: arg: int = 5 - self.assertEqual(get_class_column(DataclassColumn), 12) + tokens = tokenize_source(inspect.getsource(DataclassColumn)) + self.assertEqual(get_class_column(tokens), 12) def test_dataclass_method(self): def wrapper(f): @@ -191,7 +198,8 @@ class DataclassColumn: def func(self): pass - self.assertEqual(get_class_column(DataclassColumn), 12) + tokens = tokenize_source(inspect.getsource(DataclassColumn)) + self.assertEqual(get_class_column(tokens), 12) class ClassVariableTests(TestCase): From e64c747377e09f77c21341ec823dbea87667feee Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Thu, 29 Aug 2024 19:58:05 +0200 Subject: [PATCH 6/7] Fix edge case where node.end_lineno is None --- src/tap/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tap/utils.py b/src/tap/utils.py index 793a3e0..d5e1d16 100644 --- a/src/tap/utils.py +++ b/src/tap/utils.py @@ -264,6 +264,11 @@ def get_subsequent_assign_lines(source_cls: str) -> Set[int]: assign_lines = set() for node in cls_body.body: if isinstance(node, (ast.Assign, ast.AnnAssign)): + # Check if the end line number is found + if node.end_lineno is None: + warnings.warn(parse_warning) + return set() + # Get line number of assign statement excluding the first line (and minus 1 for the if statement) assign_lines |= set(range(node.lineno, node.end_lineno)) From 09cb610410e6b4addc6929dcf41b2680d964feca Mon Sep 17 00:00:00 2001 From: arnaud-ma Date: Thu, 29 Aug 2024 20:56:09 +0200 Subject: [PATCH 7/7] replace return to continue in the loop --- src/tap/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tap/utils.py b/src/tap/utils.py index d5e1d16..c664517 100644 --- a/src/tap/utils.py +++ b/src/tap/utils.py @@ -267,7 +267,7 @@ def get_subsequent_assign_lines(source_cls: str) -> Set[int]: # Check if the end line number is found if node.end_lineno is None: warnings.warn(parse_warning) - return set() + continue # Get line number of assign statement excluding the first line (and minus 1 for the if statement) assign_lines |= set(range(node.lineno, node.end_lineno))