From 958974ab9c2891b83aeb3ba0a0e91439e8364248 Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Tue, 30 Mar 2021 18:32:58 +0200 Subject: [PATCH 1/2] tests: fix incorrect parameters --- tests/semver/test_version.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/semver/test_version.py b/tests/semver/test_version.py index ad909f33d..499fafe4a 100644 --- a/tests/semver/test_version.py +++ b/tests/semver/test_version.py @@ -34,10 +34,10 @@ def test_parse_valid(text, version): assert parsed.text == text -@pytest.mark.parametrize("input", [(None, "example")]) -def test_parse_invalid(input): +@pytest.mark.parametrize("value", [None, "example"]) +def test_parse_invalid(value): with pytest.raises(InvalidVersion): - Version.parse(input) + Version.parse(value) @pytest.mark.parametrize( From 6bc2a9ef6771432beff02207f9a633156b697dab Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Tue, 30 Mar 2021 18:36:39 +0200 Subject: [PATCH 2/2] pep440: replace lark in favour of regex The lark early implementation of PEP440 that was introduced was not performant. The implementation added on average around 7 seconds to the test suite execution time. This change drops the lark parser, and uses `packaging.version` provided regex for version string parsing. New dataclass interface for version instances remain unaffected. --- poetry/core/version/grammars/__init__.py | 2 - poetry/core/version/grammars/pep440.lark | 32 ----- poetry/core/version/pep440/parser.py | 145 +++++++++++------------ 3 files changed, 69 insertions(+), 110 deletions(-) delete mode 100644 poetry/core/version/grammars/pep440.lark diff --git a/poetry/core/version/grammars/__init__.py b/poetry/core/version/grammars/__init__.py index f94bf1e3b..1fdf5738c 100644 --- a/poetry/core/version/grammars/__init__.py +++ b/poetry/core/version/grammars/__init__.py @@ -3,8 +3,6 @@ GRAMMAR_DIR = Path(__file__).parent -GRAMMAR_PEP_440 = GRAMMAR_DIR / "pep440.lark" - GRAMMAR_PEP_508_CONSTRAINTS = GRAMMAR_DIR / "pep508.lark" GRAMMAR_PEP_508_MARKERS = GRAMMAR_DIR / "markers.lark" diff --git a/poetry/core/version/grammars/pep440.lark b/poetry/core/version/grammars/pep440.lark deleted file mode 100644 index 62749f3b7..000000000 --- a/poetry/core/version/grammars/pep440.lark +++ /dev/null @@ -1,32 +0,0 @@ -// this is a modified version of the semver 2.0 specification grammar, specificially -// crafted for use with Python PEP 440 version specifiers. -start: version - -version: "v"? epoch? release pre_release? post_release? dev_release? ("+" local)? -release: epoch? NUMERIC_IDENTIFIER (("." NUMERIC_IDENTIFIER)+)? - -major: NUMERIC_IDENTIFIER -minor: NUMERIC_IDENTIFIER -patch: NUMERIC_IDENTIFIER - -epoch: INT "!" - -pre_release: _SEPERATOR? PRE_RELEASE_TAG _SEPERATOR? NUMERIC_IDENTIFIER? -PRE_RELEASE_TAG: "a" "lpha"? | "b" "eta"? | "c" | "rc" | "pre" "view"? - -post_release: "-" NUMERIC_IDENTIFIER | _SEPERATOR? POST_RELEASE_TAG _SEPERATOR? NUMERIC_IDENTIFIER? -POST_RELEASE_TAG: "post" | "r" "ev"? - -dev_release: _SEPERATOR? DEV_RELEASE_TAG _SEPERATOR? NUMERIC_IDENTIFIER? -DEV_RELEASE_TAG: "dev" - -local: LOCAL_IDENTIFIER ((_SEPERATOR LOCAL_IDENTIFIER)+)? -LOCAL_IDENTIFIER: (LETTER | INT)+ - -NUMERIC_IDENTIFIER: INT - -_SEPERATOR: "-" | "." | "_" - -%import common.LETTER -%import common.DIGIT -%import common.INT diff --git a/poetry/core/version/pep440/parser.py b/poetry/core/version/pep440/parser.py index ad963399f..5fb357094 100644 --- a/poetry/core/version/pep440/parser.py +++ b/poetry/core/version/pep440/parser.py @@ -1,14 +1,15 @@ +import re + from typing import TYPE_CHECKING -from typing import List +from typing import AnyStr +from typing import Match from typing import Optional from typing import Type -from lark import LarkError -from lark import Transformer +from packaging.version import VERSION_PATTERN from poetry.core.version.exceptions import InvalidVersion -from poetry.core.version.grammars import GRAMMAR_PEP_440 -from poetry.core.version.parser import Parser +from poetry.core.version.pep440 import LocalSegmentType from poetry.core.version.pep440 import Release from poetry.core.version.pep440 import ReleaseTag @@ -16,80 +17,72 @@ if TYPE_CHECKING: from poetry.core.version.pep440.version import PEP440Version -# Parser: PEP 440 -# we use earley because the grammar is ambiguous -PARSER_PEP_440 = Parser(GRAMMAR_PEP_440, "earley", False) - - -class _Transformer(Transformer): - def NUMERIC_IDENTIFIER(self, data: "Token"): # noqa - return int(data.value) - - def LOCAL_IDENTIFIER(self, data: "Token"): # noqa - try: - return int(data.value) - except ValueError: - return data.value - - def POST_RELEASE_TAG(self, data: "Token"): # noqa - return data.value - - def PRE_RELEASE_TAG(self, data: "Token"): # noqa - return data.value - - def DEV_RELEASE_TAG(self, data: "Token"): # noqa - return data.value - - def LOCAL(self, data: "Token"): # noqa - return data.value - def INT(self, data: "Token"): # noqa - return int(data.value) - - def version(self, children: List["Tree"]): # noqa - epoch, release, dev, pre, post, local = 0, None, None, None, None, None - - for child in children: - if child.data == "epoch": - # epoch is always a single numeric value - epoch = child.children[0] - elif child.data == "release": - # release segment is of the form N(.N)* - release = Release.from_parts(*child.children) - elif child.data == "pre_release": - # pre-release tag is of the form (a|b|rc)N - pre = ReleaseTag(*child.children) - elif child.data == "post_release": - # post-release tags are of the form N (shortened) or post(N)* - if len(child.children) == 1 and isinstance(child.children[0], int): - post = ReleaseTag("post", child.children[0]) - else: - post = ReleaseTag(*child.children) - elif child.data == "dev_release": - # dev-release tag is of the form dev(N)* - dev = ReleaseTag(*child.children) - elif child.data == "local": - local = tuple(child.children) - - return epoch, release, pre, post, dev, local - - def start(self, children: List["Tree"]): # noqa - return children[0] - - -_TRANSFORMER = _Transformer() +class PEP440Parser: + _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + _local_version_separators = re.compile(r"[._-]") + + @classmethod + def _get_release(cls, match: Optional[Match[AnyStr]]) -> Release: + if not match or match.group("release") is None: + return Release(0) + return Release.from_parts(*(int(i) for i in match.group("release").split("."))) + + @classmethod + def _get_prerelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]: + if not match or match.group("pre") is None: + return None + return ReleaseTag(match.group("pre_l"), int(match.group("pre_n") or 0)) + + @classmethod + def _get_postrelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]: + if not match or match.group("post") is None: + return None + + return ReleaseTag( + match.group("post_l") or "post", + int(match.group("post_n1") or match.group("post_n2") or 0), + ) + + @classmethod + def _get_devrelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]: + if not match or match.group("dev") is None: + return None + return ReleaseTag(match.group("dev_l"), int(match.group("dev_n") or 0)) + + @classmethod + def _get_local(cls, match: Optional[Match[AnyStr]]) -> Optional[LocalSegmentType]: + if not match or match.group("local") is None: + return None + + return tuple( + part.lower() if not part.isdigit() else int(part) + for part in cls._local_version_separators.split(match.group("local")) + ) + + @classmethod + def parse(cls, value: str, version_class: Optional[Type["PEP440Version"]] = None): + match = cls._regex.search(value) if value else None + if not match: + raise InvalidVersion(f"Invalid PEP 440 version: '{value}'") + + if version_class is None: + from poetry.core.version.pep440.version import PEP440Version + + version_class = PEP440Version + + return version_class( + epoch=int(match.group("epoch")) if match.group("epoch") else 0, + release=cls._get_release(match), + pre=cls._get_prerelease(match), + post=cls._get_postrelease(match), + dev=cls._get_devrelease(match), + local=cls._get_local(match), + text=value, + ) def parse_pep440( value: str, version_class: Optional[Type["PEP440Version"]] = None ) -> "PEP440Version": - if version_class is None: - from poetry.core.version.pep440.version import PEP440Version - - version_class = PEP440Version - - try: - tree = PARSER_PEP_440.parse(text=value) - return version_class(*_TRANSFORMER.transform(tree), text=value) - except (TypeError, LarkError): - raise InvalidVersion(f"Invalid PEP 440 version: '{value}'") + return PEP440Parser.parse(value, version_class)