Skip to content

Commit

Permalink
Merge pull request #6245 from nicolasbock/ipv6_uri
Browse files Browse the repository at this point in the history
Fix the URL quoting in _clean_link() for IPv6 addresses
  • Loading branch information
cjerdonek authored Apr 8, 2019
2 parents 78744e8 + 913757c commit 54b6a91
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 5 deletions.
1 change: 1 addition & 0 deletions news/6285.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix incorrect URL quoting of IPv6 addresses.
21 changes: 17 additions & 4 deletions src/pip/_internal/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,15 +939,28 @@ def _get_encoding_from_headers(headers):
return None


_CLEAN_LINK_RE = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)


def _clean_link(url):
# type: (str) -> str
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
the link, it will be rewritten to %20 (while not over-quoting
% or other characters)."""
return _CLEAN_LINK_RE.sub(lambda match: '%%%2x' % ord(match.group(0)), url)
# Split the URL into parts according to the general structure
# `scheme://netloc/path;parameters?query#fragment`. Note that the
# `netloc` can be empty and the URI will then refer to a local
# filesystem path.
result = urllib_parse.urlparse(url)
# In both cases below we unquote prior to quoting to make sure
# nothing is double quoted.
if result.netloc == "":
# On Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
path = urllib_request.pathname2url(
urllib_request.url2pathname(result.path))
else:
path = urllib_parse.quote(urllib_parse.unquote(result.path))
return urllib_parse.urlunparse(result._replace(path=path))


class HTMLPage(object):
Expand Down
64 changes: 63 additions & 1 deletion tests/unit/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pip._internal.download import PipSession
from pip._internal.index import (
Link, PackageFinder, _determine_base_url, _egg_info_matches,
Link, PackageFinder, _clean_link, _determine_base_url, _egg_info_matches,
_find_name_version_sep, _get_html_page,
)

Expand Down Expand Up @@ -280,3 +280,65 @@ def test_request_retries(caplog):
'Could not fetch URL http://localhost: Retry error - skipping'
in caplog.text
)


@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with hostname and port. Port separator should not be quoted.
("https://localhost.localdomain:8181/path/with space/",
"https://localhost.localdomain:8181/path/with%20space/"),
# URL that is already properly quoted. The quoting `%`
# characters should not be quoted again.
("https://localhost.localdomain:8181/path/with%20quoted%20space/",
"https://localhost.localdomain:8181/path/with%20quoted%20space/"),
# URL with IPv4 address and port.
("https://127.0.0.1:8181/path/with space/",
"https://127.0.0.1:8181/path/with%20space/"),
# URL with IPv6 address and port. The `[]` brackets around the
# IPv6 address should not be quoted.
("https://[fd00:0:0:236::100]:8181/path/with space/",
"https://[fd00:0:0:236::100]:8181/path/with%20space/"),
# URL with query. The leading `?` should not be quoted.
("https://localhost.localdomain:8181/path/with/query?request=test",
"https://localhost.localdomain:8181/path/with/query?request=test"),
# URL with colon in the path portion.
("https://localhost.localdomain:8181/path:/with:/colon",
"https://localhost.localdomain:8181/path%3A/with%3A/colon"),
# URL with something that looks like a drive letter, but is
# not. The `:` should be quoted.
("https://localhost.localdomain/T:/path/",
"https://localhost.localdomain/T%3A/path/")
]
)
def test_clean_link(url, clean_url):
assert(_clean_link(url) == clean_url)


@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with Windows drive letter. The `:` after the drive
# letter should not be quoted. The trailing `/` should be
# removed.
("file:///T:/path/with spaces/",
"file:///T:/path/with%20spaces")
]
)
@pytest.mark.skipif("sys.platform != 'win32'")
def test_clean_link_windows(url, clean_url):
assert(_clean_link(url) == clean_url)


@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with Windows drive letter, running on non-windows
# platform. The `:` after the drive should be quoted.
("file:///T:/path/with spaces/",
"file:///T%3A/path/with%20spaces/")
]
)
@pytest.mark.skipif("sys.platform == 'win32'")
def test_clean_link_non_windows(url, clean_url):
assert(_clean_link(url) == clean_url)

0 comments on commit 54b6a91

Please sign in to comment.