Skip to content

Commit

Permalink
Stop using Python's eval() for -m and -k
Browse files Browse the repository at this point in the history
Previously, the expressions given to the `-m` and `-k` options were
evaluated with `eval`. This causes a few issues:

- Python keywords cannot be used.

- Constants like numbers, None, True, False are not handled correctly.

- Various syntax like numeric operators and `X if Y else Z` is supported
  unintentionally.

- `eval()` is somewhat dangerous for arbitrary input.

- Can fail in many ways so requires `except Exception`.

The format we want to support is quite simple, so change to a custom
parser. This fixes the issues above, and gives us full control of the
format, so can be documented comprehensively and even be extended in the
future if we wish.

Fixes #1141.
Fixes #3573.
Fixes #5881.
Fixes #6822.
Fixes #7112.
  • Loading branch information
bluetech committed Apr 25, 2020
1 parent cbca9f1 commit 49b2fab
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 75 deletions.
4 changes: 4 additions & 0 deletions changelog/TBD.breaking.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Expressions given to the ``-m`` and ``-k`` options are not longer evaluated using Python's ``eval()``.

The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against.
Python constants, keywords or operators are no longer evaluated differently.
18 changes: 3 additions & 15 deletions doc/en/example/markers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ Or select multiple nodes:
Using ``-k expr`` to select tests based on their name
-------------------------------------------------------

.. versionadded: 2.0/2.3.4
.. versionadded:: 2.0/2.3.4

You can use the ``-k`` command line option to specify an expression
which implements a substring match on the test names instead of the
exact match on markers that ``-m`` provides. This makes it easy to
select tests based on their names:

.. versionadded: 5.4
.. versionchanged:: 5.4

The expression matching is now case-insensitive.

Expand Down Expand Up @@ -198,20 +198,8 @@ Or to select "http" and "quick" tests:
===================== 2 passed, 2 deselected in 0.12s ======================
.. note::

If you are using expressions such as ``"X and Y"`` then both ``X`` and ``Y``
need to be simple non-keyword names. For example, ``"pass"`` or ``"from"``
will result in SyntaxErrors because ``"-k"`` evaluates the expression using
Python's `eval`_ function.

.. _`eval`: https://docs.python.org/3.6/library/functions.html#eval

You can use ``and``, ``or``, ``not`` and parentheses.

However, if the ``"-k"`` argument is a simple string, no such restrictions
apply. Also ``"-k 'not STRING'"`` has no restrictions. You can also
specify numbers like ``"-k 1.3"`` to match tests which are parametrized
with the float ``"1.3"``.

Registering markers
-------------------------------------
Expand Down
207 changes: 167 additions & 40 deletions src/_pytest/mark/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
this is a place where we put datastructures used by legacy apis
we hope to remove
"""
import keyword
import enum
import re
from typing import Callable
from typing import Iterator
from typing import Optional
from typing import Sequence
from typing import Set

import attr
Expand All @@ -11,35 +16,174 @@
from _pytest.config import UsageError

if TYPE_CHECKING:
from typing import NoReturn

from _pytest.nodes import Item # noqa: F401 (used in type string)


# The grammar for match expressions is:
#
# expr: and_expr ('or' and_expr)*
# and_expr: not_expr ('and' not_expr)*
# not_expr: 'not' not_expr | '(' expr ')' | ident
# ident: (\w|\+|-|\.|\[|\])+


class TokenType(enum.Enum):
LPAREN = "left parenthesis"
RPAREN = "right parenthesis"
OR = "or"
AND = "and"
NOT = "not"
IDENT = "identifier"
EOF = "end of input"


@attr.s(frozen=True, slots=True)
class Token:
type = attr.ib(type=TokenType)
value = attr.ib(type=str)
column = attr.ib(type=int)


class ParseError(Exception):
def __init__(self, column: int, message: str) -> None:
self.column = column
self.message = message

def __str__(self) -> str:
return "at column {}: {}".format(self.column, self.message)


class Scanner:
__slots__ = ("tokens", "current")

def __init__(self, input: str) -> None:
self.tokens = self.lex(input)
self.current = next(self.tokens)

def lex(self, input: str) -> Iterator[Token]:
pos = 0
while pos < len(input):
if input[pos] in (" ", "\t"):
pos += 1
elif input[pos] == "(":
yield Token(TokenType.LPAREN, "(", pos + 1)
pos += 1
elif input[pos] == ")":
yield Token(TokenType.RPAREN, ")", pos + 1)
pos += 1
else:
match = re.match(r"(:?\w|\+|-|\.|\[|\])+", input[pos:])
if match:
value = match.group(0)
if value == "and":
yield Token(TokenType.AND, value, pos + 1)
elif value == "or":
yield Token(TokenType.OR, value, pos + 1)
elif value == "not":
yield Token(TokenType.NOT, value, pos + 1)
else:
yield Token(TokenType.IDENT, value, pos + 1)
pos += len(value)
else:
raise ParseError(
pos + 1, 'unexpected character "{}"'.format(input[pos]),
)
yield Token(TokenType.EOF, "", pos + 1)

def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]:
if self.current.type is type:
token = self.current
if token.type is not TokenType.EOF:
self.current = next(self.tokens)
return token
if reject:
self.reject((type,))
return None

def reject(self, expected: Sequence[TokenType]) -> "NoReturn":
raise ParseError(
self.current.column,
"expected {}; got {}".format(
" OR ".join(type.value for type in expected), self.current.type.value,
),
)


def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
ret = and_expr(s, matcher)
while s.accept(TokenType.OR):
rhs = and_expr(s, matcher)
ret = ret or rhs
return ret


def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
ret = not_expr(s, matcher)
while s.accept(TokenType.AND):
rhs = not_expr(s, matcher)
ret = ret and rhs
return ret


def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
if s.accept(TokenType.NOT):
return not not_expr(s, matcher)
if s.accept(TokenType.LPAREN):
ret = expr(s, matcher)
s.accept(TokenType.RPAREN, reject=True)
return ret
ident = s.accept(TokenType.IDENT)
if ident:
return matcher(ident.value)
s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))


def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
"""Evaluate a match expression as used by -k and -m.
:param input: The input expression - one line.
:param matcher: Given an identifier, should return whether it matches or not.
Returns whether the entire expression matches or not.
"""
s = Scanner(input)
ret = expr(s, matcher)
s.accept(TokenType.EOF, reject=True)
return ret


# The actual matchers:


@attr.s
class MarkMapping:
"""Provides a local mapping for markers where item access
resolves to True if the marker is present. """
class MarkMatcher:
"""A matcher for markers which are present."""

own_mark_names = attr.ib()

@classmethod
def from_item(cls, item):
def from_item(cls, item) -> "MarkMatcher":
mark_names = {mark.name for mark in item.iter_markers()}
return cls(mark_names)

def __getitem__(self, name):
def __call__(self, name: str) -> bool:
return name in self.own_mark_names


@attr.s
class KeywordMapping:
"""Provides a local mapping for keywords.
Given a list of names, map any substring of one of these names to True.
class KeywordMatcher:
"""A matcher for keywords.
Given a list of names, matches any substring of one of these names. The
string inclusion check is case-insensitive.
"""

_names = attr.ib(type=Set[str])

@classmethod
def from_item(cls, item: "Item") -> "KeywordMapping":
def from_item(cls, item: "Item") -> "KeywordMatcher":
mapped_names = set()

# Add the names of the current item and any parent items
Expand All @@ -62,12 +206,7 @@ def from_item(cls, item: "Item") -> "KeywordMapping":

return cls(mapped_names)

def __getitem__(self, subname: str) -> bool:
"""Return whether subname is included within stored names.
The string inclusion check is case-insensitive.
"""
def __call__(self, subname: str) -> bool:
subname = subname.lower()
names = (name.lower() for name in self._names)

Expand All @@ -77,18 +216,17 @@ def __getitem__(self, subname: str) -> bool:
return False


python_keywords_allowed_list = ["or", "and", "not"]


def matchmark(colitem, markexpr):
def matchmark(colitem, markexpr: str) -> bool:
"""Tries to match on any marker names, attached to the given colitem."""
try:
return eval(markexpr, {}, MarkMapping.from_item(colitem))
except Exception:
raise UsageError("Wrong expression passed to '-m': {}".format(markexpr))
return evaluate(markexpr, MarkMatcher.from_item(colitem))
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-m': {}: {}".format(markexpr, e)
) from None


def matchkeyword(colitem, keywordexpr):
def matchkeyword(colitem, keywordexpr: str) -> bool:
"""Tries to match given keyword expression to given collector item.
Will match on the name of colitem, including the names of its parents.
Expand All @@ -97,20 +235,9 @@ def matchkeyword(colitem, keywordexpr):
Additionally, matches on names in the 'extra_keyword_matches' set of
any item, as well as names directly assigned to test functions.
"""
mapping = KeywordMapping.from_item(colitem)
if " " not in keywordexpr:
# special case to allow for simple "-k pass" and "-k 1.3"
return mapping[keywordexpr]
elif keywordexpr.startswith("not ") and " " not in keywordexpr[4:]:
return not mapping[keywordexpr[4:]]
for kwd in keywordexpr.split():
if keyword.iskeyword(kwd) and kwd not in python_keywords_allowed_list:
raise UsageError(
"Python keyword '{}' not accepted in expressions passed to '-k'".format(
kwd
)
)
try:
return eval(keywordexpr, {}, mapping)
except Exception:
raise UsageError("Wrong expression passed to '-k': {}".format(keywordexpr))
return evaluate(keywordexpr, KeywordMatcher.from_item(colitem))
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
) from None
Loading

0 comments on commit 49b2fab

Please sign in to comment.