diff --git a/changelog/7122.breaking.rst b/changelog/7122.breaking.rst new file mode 100644 index 0000000000..7fe329c9ff --- /dev/null +++ b/changelog/7122.breaking.rst @@ -0,0 +1,3 @@ +Expressions given to the ``-m`` and ``-k`` options are no longer evaluated using Python's ``eval()``. +The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against. +Python constants, keywords or other operators are no longer evaluated differently. diff --git a/doc/en/example/markers.rst b/doc/en/example/markers.rst index 467c2a2faf..e791f489d0 100644 --- a/doc/en/example/markers.rst +++ b/doc/en/example/markers.rst @@ -141,14 +141,14 @@ Or select multiple nodes: Using ``-k expr`` to select tests based on their name ------------------------------------------------------- -.. versionadded: 2.0/2.3.4 +.. versionadded:: 2.0/2.3.4 You can use the ``-k`` command line option to specify an expression which implements a substring match on the test names instead of the exact match on markers that ``-m`` provides. This makes it easy to select tests based on their names: -.. versionadded: 5.4 +.. versionchanged:: 5.4 The expression matching is now case-insensitive. @@ -198,20 +198,8 @@ Or to select "http" and "quick" tests: ===================== 2 passed, 2 deselected in 0.12s ====================== -.. note:: - - If you are using expressions such as ``"X and Y"`` then both ``X`` and ``Y`` - need to be simple non-keyword names. For example, ``"pass"`` or ``"from"`` - will result in SyntaxErrors because ``"-k"`` evaluates the expression using - Python's `eval`_ function. - -.. _`eval`: https://docs.python.org/3.6/library/functions.html#eval - +You can use ``and``, ``or``, ``not`` and parentheses. - However, if the ``"-k"`` argument is a simple string, no such restrictions - apply. Also ``"-k 'not STRING'"`` has no restrictions. You can also - specify numbers like ``"-k 1.3"`` to match tests which are parametrized - with the float ``"1.3"``. Registering markers ------------------------------------- diff --git a/src/_pytest/mark/expression.py b/src/_pytest/mark/expression.py new file mode 100644 index 0000000000..008192d4af --- /dev/null +++ b/src/_pytest/mark/expression.py @@ -0,0 +1,173 @@ +r""" +Evaluate match expressions, as used by `-k` and `-m`. + +The grammar is: + +expression: expr? EOF +expr: and_expr ('or' and_expr)* +and_expr: not_expr ('and' not_expr)* +not_expr: 'not' not_expr | '(' expr ')' | ident +ident: (\w|:|\+|-|\.|\[|\])+ + +The semantics are: + +- Empty expression evaluates to False. +- ident evaluates to True of False according to a provided matcher function. +- or/and/not evaluate according to the usual boolean semantics. +""" +import enum +import re +from typing import Callable +from typing import Iterator +from typing import Optional +from typing import Sequence + +import attr + +from _pytest.compat import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import NoReturn + + +__all__ = [ + "evaluate", + "ParseError", +] + + +class TokenType(enum.Enum): + LPAREN = "left parenthesis" + RPAREN = "right parenthesis" + OR = "or" + AND = "and" + NOT = "not" + IDENT = "identifier" + EOF = "end of input" + + +@attr.s(frozen=True, slots=True) +class Token: + type = attr.ib(type=TokenType) + value = attr.ib(type=str) + pos = attr.ib(type=int) + + +class ParseError(Exception): + """The expression contains invalid syntax. + + :param column: The column in the line where the error occurred (1-based). + :param message: A description of the error. + """ + + def __init__(self, column: int, message: str) -> None: + self.column = column + self.message = message + + def __str__(self) -> str: + return "at column {}: {}".format(self.column, self.message) + + +class Scanner: + __slots__ = ("tokens", "current") + + def __init__(self, input: str) -> None: + self.tokens = self.lex(input) + self.current = next(self.tokens) + + def lex(self, input: str) -> Iterator[Token]: + pos = 0 + while pos < len(input): + if input[pos] in (" ", "\t"): + pos += 1 + elif input[pos] == "(": + yield Token(TokenType.LPAREN, "(", pos) + pos += 1 + elif input[pos] == ")": + yield Token(TokenType.RPAREN, ")", pos) + pos += 1 + else: + match = re.match(r"(:?\w|:|\+|-|\.|\[|\])+", input[pos:]) + if match: + value = match.group(0) + if value == "or": + yield Token(TokenType.OR, value, pos) + elif value == "and": + yield Token(TokenType.AND, value, pos) + elif value == "not": + yield Token(TokenType.NOT, value, pos) + else: + yield Token(TokenType.IDENT, value, pos) + pos += len(value) + else: + raise ParseError( + pos + 1, 'unexpected character "{}"'.format(input[pos]), + ) + yield Token(TokenType.EOF, "", pos) + + def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]: + if self.current.type is type: + token = self.current + if token.type is not TokenType.EOF: + self.current = next(self.tokens) + return token + if reject: + self.reject((type,)) + return None + + def reject(self, expected: Sequence[TokenType]) -> "NoReturn": + raise ParseError( + self.current.pos + 1, + "expected {}; got {}".format( + " OR ".join(type.value for type in expected), self.current.type.value, + ), + ) + + +def expression(s: Scanner, matcher: Callable[[str], bool]) -> bool: + if s.accept(TokenType.EOF): + return False + ret = expr(s, matcher) + s.accept(TokenType.EOF, reject=True) + return ret + + +def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool: + ret = and_expr(s, matcher) + while s.accept(TokenType.OR): + rhs = and_expr(s, matcher) + ret = ret or rhs + return ret + + +def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool: + ret = not_expr(s, matcher) + while s.accept(TokenType.AND): + rhs = not_expr(s, matcher) + ret = ret and rhs + return ret + + +def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool: + if s.accept(TokenType.NOT): + return not not_expr(s, matcher) + if s.accept(TokenType.LPAREN): + ret = expr(s, matcher) + s.accept(TokenType.RPAREN, reject=True) + return ret + ident = s.accept(TokenType.IDENT) + if ident: + return matcher(ident.value) + s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT)) + + +def evaluate(input: str, matcher: Callable[[str], bool]) -> bool: + """Evaluate a match expression as used by -k and -m. + + :param input: The input expression - one line. + :param matcher: Given an identifier, should return whether it matches or not. + Should be prepared to handle arbitrary strings as input. + + Returns whether the entire expression matches or not. + """ + return expression(Scanner(input), matcher) diff --git a/src/_pytest/mark/legacy.py b/src/_pytest/mark/legacy.py index eb50340f24..a9461d4cef 100644 --- a/src/_pytest/mark/legacy.py +++ b/src/_pytest/mark/legacy.py @@ -2,44 +2,46 @@ this is a place where we put datastructures used by legacy apis we hope to remove """ -import keyword from typing import Set import attr from _pytest.compat import TYPE_CHECKING from _pytest.config import UsageError +from _pytest.mark.expression import evaluate +from _pytest.mark.expression import ParseError if TYPE_CHECKING: from _pytest.nodes import Item # noqa: F401 (used in type string) @attr.s -class MarkMapping: - """Provides a local mapping for markers where item access - resolves to True if the marker is present. """ +class MarkMatcher: + """A matcher for markers which are present.""" own_mark_names = attr.ib() @classmethod - def from_item(cls, item): + def from_item(cls, item) -> "MarkMatcher": mark_names = {mark.name for mark in item.iter_markers()} return cls(mark_names) - def __getitem__(self, name): + def __call__(self, name: str) -> bool: return name in self.own_mark_names @attr.s -class KeywordMapping: - """Provides a local mapping for keywords. - Given a list of names, map any substring of one of these names to True. +class KeywordMatcher: + """A matcher for keywords. + + Given a list of names, matches any substring of one of these names. The + string inclusion check is case-insensitive. """ _names = attr.ib(type=Set[str]) @classmethod - def from_item(cls, item: "Item") -> "KeywordMapping": + def from_item(cls, item: "Item") -> "KeywordMatcher": mapped_names = set() # Add the names of the current item and any parent items @@ -62,12 +64,7 @@ def from_item(cls, item: "Item") -> "KeywordMapping": return cls(mapped_names) - def __getitem__(self, subname: str) -> bool: - """Return whether subname is included within stored names. - - The string inclusion check is case-insensitive. - - """ + def __call__(self, subname: str) -> bool: subname = subname.lower() names = (name.lower() for name in self._names) @@ -77,18 +74,17 @@ def __getitem__(self, subname: str) -> bool: return False -python_keywords_allowed_list = ["or", "and", "not"] - - -def matchmark(colitem, markexpr): +def matchmark(colitem, markexpr: str) -> bool: """Tries to match on any marker names, attached to the given colitem.""" try: - return eval(markexpr, {}, MarkMapping.from_item(colitem)) - except Exception: - raise UsageError("Wrong expression passed to '-m': {}".format(markexpr)) + return evaluate(markexpr, MarkMatcher.from_item(colitem)) + except ParseError as e: + raise UsageError( + "Wrong expression passed to '-m': {}: {}".format(markexpr, e) + ) from None -def matchkeyword(colitem, keywordexpr): +def matchkeyword(colitem, keywordexpr: str) -> bool: """Tries to match given keyword expression to given collector item. Will match on the name of colitem, including the names of its parents. @@ -97,20 +93,9 @@ def matchkeyword(colitem, keywordexpr): Additionally, matches on names in the 'extra_keyword_matches' set of any item, as well as names directly assigned to test functions. """ - mapping = KeywordMapping.from_item(colitem) - if " " not in keywordexpr: - # special case to allow for simple "-k pass" and "-k 1.3" - return mapping[keywordexpr] - elif keywordexpr.startswith("not ") and " " not in keywordexpr[4:]: - return not mapping[keywordexpr[4:]] - for kwd in keywordexpr.split(): - if keyword.iskeyword(kwd) and kwd not in python_keywords_allowed_list: - raise UsageError( - "Python keyword '{}' not accepted in expressions passed to '-k'".format( - kwd - ) - ) try: - return eval(keywordexpr, {}, mapping) - except Exception: - raise UsageError("Wrong expression passed to '-k': {}".format(keywordexpr)) + return evaluate(keywordexpr, KeywordMatcher.from_item(colitem)) + except ParseError as e: + raise UsageError( + "Wrong expression passed to '-k': {}: {}".format(keywordexpr, e) + ) from None diff --git a/testing/test_mark.py b/testing/test_mark.py index 2aad2b1ba5..30a18b38e7 100644 --- a/testing/test_mark.py +++ b/testing/test_mark.py @@ -200,6 +200,8 @@ def test_hello(): "spec", [ ("xyz", ("test_one",)), + ("((( xyz)) )", ("test_one",)), + ("not not xyz", ("test_one",)), ("xyz and xyz2", ()), ("xyz2", ("test_two",)), ("xyz or xyz2", ("test_one", "test_two")), @@ -258,9 +260,11 @@ def test_nointer(): "spec", [ ("interface", ("test_interface",)), - ("not interface", ("test_nointer", "test_pass")), + ("not interface", ("test_nointer", "test_pass", "test_1", "test_2")), ("pass", ("test_pass",)), - ("not pass", ("test_interface", "test_nointer")), + ("not pass", ("test_interface", "test_nointer", "test_1", "test_2")), + ("not not not (pass)", ("test_interface", "test_nointer", "test_1", "test_2")), + ("1 or 2", ("test_1", "test_2")), ], ) def test_keyword_option_custom(spec, testdir): @@ -272,6 +276,10 @@ def test_nointer(): pass def test_pass(): pass + def test_1(): + pass + def test_2(): + pass """ ) opt, passed_result = spec @@ -293,7 +301,7 @@ def test_keyword_option_considers_mark(testdir): "spec", [ ("None", ("test_func[None]",)), - ("1.3", ("test_func[1.3]",)), + ("[1.3]", ("test_func[1.3]",)), ("2-3", ("test_func[2-3]",)), ], ) @@ -333,10 +341,23 @@ def test_func(arg): "spec", [ ( - "foo or import", - "ERROR: Python keyword 'import' not accepted in expressions passed to '-k'", + "foo or", + "at column 7: expected not OR left parenthesis OR identifier; got end of input", + ), + ( + "foo or or", + "at column 8: expected not OR left parenthesis OR identifier; got or", + ), + ("(foo", "at column 5: expected right parenthesis; got end of input",), + ("foo bar", "at column 5: expected end of input; got identifier",), + ( + "or or", + "at column 1: expected not OR left parenthesis OR identifier; got or", + ), + ( + "not or", + "at column 5: expected not OR left parenthesis OR identifier; got or", ), - ("foo or", "ERROR: Wrong expression passed to '-k': foo or"), ], ) def test_keyword_option_wrong_arguments(spec, testdir, capsys): @@ -798,10 +819,12 @@ def test_one(): passed, skipped, failed = reprec.countoutcomes() assert passed + skipped + failed == 0 - def test_no_magic_values(self, testdir): + @pytest.mark.parametrize( + "keyword", ["__", "+", ".."], + ) + def test_no_magic_values(self, testdir, keyword: str) -> None: """Make sure the tests do not match on magic values, - no double underscored values, like '__dict__', - and no instance values, like '()'. + no double underscored values, like '__dict__' and '+'. """ p = testdir.makepyfile( """ @@ -809,16 +832,12 @@ def test_one(): assert 1 """ ) - def assert_test_is_not_selected(keyword): - reprec = testdir.inline_run("-k", keyword, p) - passed, skipped, failed = reprec.countoutcomes() - dlist = reprec.getcalls("pytest_deselected") - assert passed + skipped + failed == 0 - deselected_tests = dlist[0].items - assert len(deselected_tests) == 1 - - assert_test_is_not_selected("__") - assert_test_is_not_selected("()") + reprec = testdir.inline_run("-k", keyword, p) + passed, skipped, failed = reprec.countoutcomes() + dlist = reprec.getcalls("pytest_deselected") + assert passed + skipped + failed == 0 + deselected_tests = dlist[0].items + assert len(deselected_tests) == 1 class TestMarkDecorator: @@ -1023,7 +1042,7 @@ def test_foo(): pass """ ) - expected = "ERROR: Wrong expression passed to '-m': {}".format(expr) + expected = "ERROR: Wrong expression passed to '-m': {}: *".format(expr) result = testdir.runpytest(foo, "-m", expr) result.stderr.fnmatch_lines([expected]) assert result.ret == ExitCode.USAGE_ERROR diff --git a/testing/test_mark_expression.py b/testing/test_mark_expression.py new file mode 100644 index 0000000000..74576786d1 --- /dev/null +++ b/testing/test_mark_expression.py @@ -0,0 +1,162 @@ +import pytest +from _pytest.mark.expression import evaluate +from _pytest.mark.expression import ParseError + + +def test_empty_is_false() -> None: + assert not evaluate("", lambda ident: False) + assert not evaluate("", lambda ident: True) + assert not evaluate(" ", lambda ident: False) + assert not evaluate("\t", lambda ident: False) + + +@pytest.mark.parametrize( + ("expr", "expected"), + ( + ("true", True), + ("true", True), + ("false", False), + ("not true", False), + ("not false", True), + ("not not true", True), + ("not not false", False), + ("true and true", True), + ("true and false", False), + ("false and true", False), + ("true and true and true", True), + ("true and true and false", False), + ("true and true and not true", False), + ("false or false", False), + ("false or true", True), + ("true or true", True), + ("true or true or false", True), + ("true and true or false", True), + ("not true or true", True), + ("(not true) or true", True), + ("not (true or true)", False), + ("true and true or false and false", True), + ("true and (true or false) and false", False), + ("true and (true or (not (not false))) and false", False), + ), +) +def test_basic(expr: str, expected: bool) -> None: + matcher = {"true": True, "false": False}.__getitem__ + assert evaluate(expr, matcher) is expected + + +@pytest.mark.parametrize( + ("expr", "expected"), + ( + (" true ", True), + (" ((((((true)))))) ", True), + (" ( ((\t (((true))))) \t \t)", True), + ("( true and (((false))))", False), + ("not not not not true", True), + ("not not not not not true", False), + ), +) +def test_syntax_oddeties(expr: str, expected: bool) -> None: + matcher = {"true": True, "false": False}.__getitem__ + assert evaluate(expr, matcher) is expected + + +@pytest.mark.parametrize( + ("expr", "column", "message"), + ( + ("(", 2, "expected not OR left parenthesis OR identifier; got end of input"), + (" (", 3, "expected not OR left parenthesis OR identifier; got end of input",), + ( + ")", + 1, + "expected not OR left parenthesis OR identifier; got right parenthesis", + ), + ( + ") ", + 1, + "expected not OR left parenthesis OR identifier; got right parenthesis", + ), + ("not", 4, "expected not OR left parenthesis OR identifier; got end of input",), + ( + "not not", + 8, + "expected not OR left parenthesis OR identifier; got end of input", + ), + ( + "(not)", + 5, + "expected not OR left parenthesis OR identifier; got right parenthesis", + ), + ("and", 1, "expected not OR left parenthesis OR identifier; got and"), + ( + "ident and", + 10, + "expected not OR left parenthesis OR identifier; got end of input", + ), + ("ident and or", 11, "expected not OR left parenthesis OR identifier; got or",), + ("ident ident", 7, "expected end of input; got identifier"), + ), +) +def test_syntax_errors(expr: str, column: int, message: str) -> None: + with pytest.raises(ParseError) as excinfo: + evaluate(expr, lambda ident: True) + assert excinfo.value.column == column + assert excinfo.value.message == message + + +@pytest.mark.parametrize( + "ident", + ( + ".", + "...", + ":::", + "a:::c", + "a+-b", + "אבגד", + "aaאבגדcc", + "a[bcd]", + "1234", + "1234abcd", + "1234and", + "notandor", + "not_and_or", + "not[and]or", + "1234+5678", + "123.232", + "True", + "False", + "if", + "else", + "while", + ), +) +def test_valid_idents(ident: str) -> None: + assert evaluate(ident, {ident: True}.__getitem__) + + +@pytest.mark.parametrize( + "ident", + ( + "/", + "\\", + "^", + "*", + "=", + "&", + "%", + "$", + "#", + "@", + "!", + "~", + "{", + "}", + '"', + "'", + "|", + ";", + "←", + ), +) +def test_invalid_idents(ident: str) -> None: + with pytest.raises(ParseError): + evaluate(ident, lambda ident: True)