Skip to content

Commit

Permalink
Avoid nested quantifiers with overlapping character space on git url …
Browse files Browse the repository at this point in the history
…parsing (#1902 (#1913)

* fix (git): match for `\w` instead of `.` for getting user

* change (vcs.git): hold pattern of the regex parts in a dictionary to be consistent over all regexs

* new (vcs.git): test for `parse_url` and some fixes for the regex pattern

* new (vcs.git): test for `parse_url` with string that should fail

* fix (test.vcs.git): make flake8 happy
  • Loading branch information
finswimmer authored and sdispater committed Jan 22, 2020
1 parent 954d160 commit 2df0d2c
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 32 deletions.
102 changes: 70 additions & 32 deletions poetry/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,84 @@
from poetry.utils._compat import decode


pattern_formats = {
"protocol": r"\w+",
"user": r"[a-zA-Z0-9_.-]+",
"resource": r"[a-zA-Z0-9_.-]+",
"port": r"\d+",
"path": r"[\w\-/\\]+",
"name": r"[\w\-]+",
"rev": r"[^@#]+",
}

PATTERNS = [
re.compile(
r"(git\+)?"
r"((?P<protocol>\w+)://)"
r"((?P<user>\w+)@)?"
r"(?P<resource>[\w.\-]+)"
r"(:(?P<port>\d+))?"
r"(?P<pathname>(/(?P<owner>\w+)/)"
r"((?P<projects>([\w\-/]+)/)?(?P<name>[\w\-]+)(\.git|/)?)?)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
),
re.compile(
r"^(git\+)?"
r"(?P<protocol>https?|git|ssh|rsync|file)://"
r"(?:(?P<user>.+)@)*"
r"(?P<resource>[a-z0-9_.-]*)"
r"(:?P<port>[\d]+)?"
r"(?P<pathname>[:/]((?P<owner>[\w\-]+)/(?P<projects>([\w\-/]+)/)?)?"
r"((?P<name>[\w\-.]+?)(\.git|/)?)?)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
r"(?:(?P<user>{user})@)?"
r"(?P<resource>{resource})?"
r"(:(?P<port>{port}))?"
r"(?P<pathname>[:/\\]({path}[/\\])?"
r"((?P<name>{name}?)(\.git|[/\\])?)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
user=pattern_formats["user"],
resource=pattern_formats["resource"],
port=pattern_formats["port"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
re.compile(
r"^(?:(?P<user>.+)@)*"
r"(?P<resource>[a-z0-9_.-]*)[:]*"
r"(?P<port>[\d]+)?"
r"(?P<pathname>/?(?P<owner>.+)/(?P<projects>([\w\-/]+)/)?(?P<name>.+).git)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
r"(git\+)?"
r"((?P<protocol>{protocol})://)"
r"(?:(?P<user>{user})@)?"
r"(?P<resource>{resource}:?)"
r"(:(?P<port>{port}))?"
r"(?P<pathname>({path})"
r"(?P<name>{name})(\.git|/)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
protocol=pattern_formats["protocol"],
user=pattern_formats["user"],
resource=pattern_formats["resource"],
port=pattern_formats["port"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
re.compile(
r"((?P<user>\w+)@)?"
r"(?P<resource>[\w.\-]+)"
r"[:/]{1,2}"
r"(?P<pathname>((?P<owner>\w+)/)?"
r"(?P<projects>([\w\-/]+)/)?"
r"((?P<name>[\w\-]+)(\.git|/)?)?)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
r"^(?:(?P<user>{user})@)?"
r"(?P<resource>{resource})"
r"(:(?P<port>{port}))?"
r"(?P<pathname>([:/]{path}/)"
r"(?P<name>{name})(\.git|/)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
user=pattern_formats["user"],
resource=pattern_formats["resource"],
port=pattern_formats["port"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
re.compile(
r"((?P<user>{user})@)?"
r"(?P<resource>{resource})"
r"[:/]{{1,2}}"
r"(?P<pathname>({path})"
r"(?P<name>{name})(\.git|/)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
user=pattern_formats["user"],
resource=pattern_formats["resource"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
]

Expand Down
169 changes: 169 additions & 0 deletions tests/vcs/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from poetry.vcs.git import Git
from poetry.vcs.git import GitUrl
from poetry.vcs.git import ParsedUrl


@pytest.mark.parametrize(
Expand Down Expand Up @@ -74,3 +75,171 @@
)
def test_normalize_url(url, normalized):
assert normalized == Git.normalize_url(url)


@pytest.mark.parametrize(
"url, parsed",
[
(
"git+ssh://user@hostname:project.git#commit",
ParsedUrl(
"ssh", "hostname", ":project.git", "user", None, "project", "commit"
),
),
(
"git+http://user@hostname/project/blah.git@commit",
ParsedUrl(
"http", "hostname", "/project/blah.git", "user", None, "blah", "commit"
),
),
(
"git+https://user@hostname/project/blah.git",
ParsedUrl(
"https", "hostname", "/project/blah.git", "user", None, "blah", None
),
),
(
"git+https://user@hostname:project/blah.git",
ParsedUrl(
"https", "hostname", ":project/blah.git", "user", None, "blah", None
),
),
(
"git+ssh://[email protected]:sdispater/poetry.git#v1.0.27",
ParsedUrl(
"ssh",
"github.com",
":sdispater/poetry.git",
"git",
None,
"poetry",
"v1.0.27",
),
),
(
"git+ssh://[email protected]:/sdispater/poetry.git",
ParsedUrl(
"ssh",
"github.com",
":/sdispater/poetry.git",
"git",
None,
"poetry",
None,
),
),
(
"git+ssh://[email protected]:org/repo",
ParsedUrl("ssh", "github.com", ":org/repo", "git", None, "repo", None),
),
(
"git+ssh://[email protected]/org/repo",
ParsedUrl("ssh", "github.com", "/org/repo", "git", None, "repo", None),
),
(
"git+ssh://foo:22/some/path",
ParsedUrl("ssh", "foo", "/some/path", None, "22", "path", None),
),
(
"[email protected]:org/repo",
ParsedUrl(None, "github.com", ":org/repo", "git", None, "repo", None),
),
(
"git+https://github.com/sdispater/pendulum",
ParsedUrl(
"https",
"github.com",
"/sdispater/pendulum",
None,
None,
"pendulum",
None,
),
),
(
"git+https://github.com/sdispater/pendulum#7a018f2d075b03a73409e8356f9b29c9ad4ea2c5",
ParsedUrl(
"https",
"github.com",
"/sdispater/pendulum",
None,
None,
"pendulum",
"7a018f2d075b03a73409e8356f9b29c9ad4ea2c5",
),
),
(
"git+ssh://[email protected]:b/b.git#v1.0.0",
ParsedUrl("ssh", "git.example.com", ":b/b.git", "git", None, "b", "v1.0.0"),
),
(
"git+ssh://[email protected]:sdispater/pendulum.git#foo/bar",
ParsedUrl(
"ssh",
"github.com",
":sdispater/pendulum.git",
"git",
None,
"pendulum",
"foo/bar",
),
),
(
"git+file:///foo/bar.git",
ParsedUrl("file", None, "/foo/bar.git", None, None, "bar", None),
),
(
"git+file://C:\\Users\\hello\\testing.git#zkat/windows-files",
ParsedUrl(
"file",
"C",
":\\Users\\hello\\testing.git",
None,
None,
"testing",
"zkat/windows-files",
),
),
(
"git+https://git.example.com/sdispater/project/my_repo.git",
ParsedUrl(
"https",
"git.example.com",
"/sdispater/project/my_repo.git",
None,
None,
"my_repo",
None,
),
),
(
"git+ssh://[email protected]:sdispater/project/my_repo.git",
ParsedUrl(
"ssh",
"git.example.com",
":sdispater/project/my_repo.git",
"git",
None,
"my_repo",
None,
),
),
],
)
def test_parse_url(url, parsed):
result = ParsedUrl.parse(url)
assert parsed.name == result.name
assert parsed.pathname == result.pathname
assert parsed.port == result.port
assert parsed.protocol == result.protocol
assert parsed.resource == result.resource
assert parsed.rev == result.rev
assert parsed.url == result.url
assert parsed.user == result.user


def test_parse_url_should_fail():
url = "https://" + "@" * 64 + "!"

with pytest.raises(ValueError):
ParsedUrl.parse(url)

0 comments on commit 2df0d2c

Please sign in to comment.