-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7122 from bluetech/matcher-parser
Stop using Python's eval() for -m and -k
- Loading branch information
Showing
6 changed files
with
405 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Expressions given to the ``-m`` and ``-k`` options are no longer evaluated using Python's ``eval()``. | ||
The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against. | ||
Python constants, keywords or other operators are no longer evaluated differently. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
r""" | ||
Evaluate match expressions, as used by `-k` and `-m`. | ||
The grammar is: | ||
expression: expr? EOF | ||
expr: and_expr ('or' and_expr)* | ||
and_expr: not_expr ('and' not_expr)* | ||
not_expr: 'not' not_expr | '(' expr ')' | ident | ||
ident: (\w|:|\+|-|\.|\[|\])+ | ||
The semantics are: | ||
- Empty expression evaluates to False. | ||
- ident evaluates to True of False according to a provided matcher function. | ||
- or/and/not evaluate according to the usual boolean semantics. | ||
""" | ||
import enum | ||
import re | ||
from typing import Callable | ||
from typing import Iterator | ||
from typing import Optional | ||
from typing import Sequence | ||
|
||
import attr | ||
|
||
from _pytest.compat import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from typing import NoReturn | ||
|
||
|
||
__all__ = [ | ||
"evaluate", | ||
"ParseError", | ||
] | ||
|
||
|
||
class TokenType(enum.Enum): | ||
LPAREN = "left parenthesis" | ||
RPAREN = "right parenthesis" | ||
OR = "or" | ||
AND = "and" | ||
NOT = "not" | ||
IDENT = "identifier" | ||
EOF = "end of input" | ||
|
||
|
||
@attr.s(frozen=True, slots=True) | ||
class Token: | ||
type = attr.ib(type=TokenType) | ||
value = attr.ib(type=str) | ||
pos = attr.ib(type=int) | ||
|
||
|
||
class ParseError(Exception): | ||
"""The expression contains invalid syntax. | ||
:param column: The column in the line where the error occurred (1-based). | ||
:param message: A description of the error. | ||
""" | ||
|
||
def __init__(self, column: int, message: str) -> None: | ||
self.column = column | ||
self.message = message | ||
|
||
def __str__(self) -> str: | ||
return "at column {}: {}".format(self.column, self.message) | ||
|
||
|
||
class Scanner: | ||
__slots__ = ("tokens", "current") | ||
|
||
def __init__(self, input: str) -> None: | ||
self.tokens = self.lex(input) | ||
self.current = next(self.tokens) | ||
|
||
def lex(self, input: str) -> Iterator[Token]: | ||
pos = 0 | ||
while pos < len(input): | ||
if input[pos] in (" ", "\t"): | ||
pos += 1 | ||
elif input[pos] == "(": | ||
yield Token(TokenType.LPAREN, "(", pos) | ||
pos += 1 | ||
elif input[pos] == ")": | ||
yield Token(TokenType.RPAREN, ")", pos) | ||
pos += 1 | ||
else: | ||
match = re.match(r"(:?\w|:|\+|-|\.|\[|\])+", input[pos:]) | ||
if match: | ||
value = match.group(0) | ||
if value == "or": | ||
yield Token(TokenType.OR, value, pos) | ||
elif value == "and": | ||
yield Token(TokenType.AND, value, pos) | ||
elif value == "not": | ||
yield Token(TokenType.NOT, value, pos) | ||
else: | ||
yield Token(TokenType.IDENT, value, pos) | ||
pos += len(value) | ||
else: | ||
raise ParseError( | ||
pos + 1, 'unexpected character "{}"'.format(input[pos]), | ||
) | ||
yield Token(TokenType.EOF, "", pos) | ||
|
||
def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]: | ||
if self.current.type is type: | ||
token = self.current | ||
if token.type is not TokenType.EOF: | ||
self.current = next(self.tokens) | ||
return token | ||
if reject: | ||
self.reject((type,)) | ||
return None | ||
|
||
def reject(self, expected: Sequence[TokenType]) -> "NoReturn": | ||
raise ParseError( | ||
self.current.pos + 1, | ||
"expected {}; got {}".format( | ||
" OR ".join(type.value for type in expected), self.current.type.value, | ||
), | ||
) | ||
|
||
|
||
def expression(s: Scanner, matcher: Callable[[str], bool]) -> bool: | ||
if s.accept(TokenType.EOF): | ||
return False | ||
ret = expr(s, matcher) | ||
s.accept(TokenType.EOF, reject=True) | ||
return ret | ||
|
||
|
||
def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool: | ||
ret = and_expr(s, matcher) | ||
while s.accept(TokenType.OR): | ||
rhs = and_expr(s, matcher) | ||
ret = ret or rhs | ||
return ret | ||
|
||
|
||
def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool: | ||
ret = not_expr(s, matcher) | ||
while s.accept(TokenType.AND): | ||
rhs = not_expr(s, matcher) | ||
ret = ret and rhs | ||
return ret | ||
|
||
|
||
def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool: | ||
if s.accept(TokenType.NOT): | ||
return not not_expr(s, matcher) | ||
if s.accept(TokenType.LPAREN): | ||
ret = expr(s, matcher) | ||
s.accept(TokenType.RPAREN, reject=True) | ||
return ret | ||
ident = s.accept(TokenType.IDENT) | ||
if ident: | ||
return matcher(ident.value) | ||
s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT)) | ||
|
||
|
||
def evaluate(input: str, matcher: Callable[[str], bool]) -> bool: | ||
"""Evaluate a match expression as used by -k and -m. | ||
:param input: The input expression - one line. | ||
:param matcher: Given an identifier, should return whether it matches or not. | ||
Should be prepared to handle arbitrary strings as input. | ||
Returns whether the entire expression matches or not. | ||
""" | ||
return expression(Scanner(input), matcher) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.