Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "RFC3986 compatible URL.join honoring empty segments (#1039)" #1067

Merged
merged 3 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES/1067.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Reverted RFC3986 compatible URL.join honoring empty segments which was introduced in :issue:`1039`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bdraco FYI, Sphinx has a dedicated role :rfc: which should be used to link RFCs. The rendered style would also be more consistent.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did discover that, and I changed it before I published it in the release step. At least I hope I did as my burnout level was pretty high at that point


This change introduced a regression handling query string parameters with joined URLs. The change was reverted to maintain compatibility with the previous behavior.
63 changes: 0 additions & 63 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,69 +1664,6 @@ def test_join_from_rfc_3986_abnormal(url, expected):
assert base.join(url) == expected


EMPTY_SEGMENTS = [
(
"https://web.archive.org/web/",
"./https://github.com/aio-libs/yarl",
"https://web.archive.org/web/https://github.com/aio-libs/yarl",
),
(
"https://web.archive.org/web/https://github.com/",
"aio-libs/yarl",
"https://web.archive.org/web/https://github.com/aio-libs/yarl",
),
]


@pytest.mark.parametrize("base,url,expected", EMPTY_SEGMENTS)
def test_join_empty_segments(base, url, expected):
base = URL(base)
url = URL(url)
expected = URL(expected)
joined = base.join(url)
assert joined == expected


SIMPLE_BASE = "http://a/b/c/d"
URLLIB_URLJOIN = [
("", "http://a/b/c/g?y/./x", "http://a/b/c/g?y/./x"),
("", "http://a/./g", "http://a/./g"),
("svn://pathtorepo/dir1", "dir2", "svn://pathtorepo/dir2"),
("svn+ssh://pathtorepo/dir1", "dir2", "svn+ssh://pathtorepo/dir2"),
("ws://a/b", "g", "ws://a/g"),
("wss://a/b", "g", "wss://a/g"),
# test for issue22118 duplicate slashes
(SIMPLE_BASE + "/", "foo", SIMPLE_BASE + "/foo"),
# Non-RFC-defined tests, covering variations of base and trailing
# slashes
("http://a/b/c/d/e/", "../../f/g/", "http://a/b/c/f/g/"),
("http://a/b/c/d/e", "../../f/g/", "http://a/b/f/g/"),
("http://a/b/c/d/e/", "/../../f/g/", "http://a/f/g/"),
("http://a/b/c/d/e", "/../../f/g/", "http://a/f/g/"),
("http://a/b/c/d/e/", "../../f/g", "http://a/b/c/f/g"),
("http://a/b/", "../../f/g/", "http://a/f/g/"),
("a", "b", "b"),
]


@pytest.mark.parametrize("base,url,expected", URLLIB_URLJOIN)
def test_join_cpython_urljoin(base, url, expected):
# tests from cpython urljoin
base = URL(base)
url = URL(url)
expected = URL(expected)
joined = base.join(url)
assert joined == expected


def test_join_cpython_urljoin_fail():
with pytest.raises(
TypeError, match=r"unsupported operand type\(s\) for \+: 'NoneType' and 'str'"
):
URL("http:///").join(URL(".."))
pytest.xfail("Shouldn't raise TypeError on empty host name")


def test_split_result_non_decoded():
with pytest.raises(ValueError):
URL(SplitResult("http", "example.com", "path", "qs", "frag"))
Expand Down
68 changes: 10 additions & 58 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
from contextlib import suppress
from ipaddress import ip_address
from typing import Union
from urllib.parse import SplitResult, parse_qsl, quote, urlsplit, urlunsplit
from urllib.parse import uses_netloc as uses_authority
from urllib.parse import uses_relative
from urllib.parse import SplitResult, parse_qsl, quote, urljoin, urlsplit, urlunsplit

import idna
from multidict import MultiDict, MultiDictProxy
Expand Down Expand Up @@ -261,14 +259,12 @@
netloc = authority
else:
tmp = SplitResult("", authority, "", "", "")
port = None if tmp.port == cls._default_port(scheme) else tmp.port
netloc = cls._make_netloc(
tmp.username, tmp.password, tmp.hostname, port, encode=True
tmp.username, tmp.password, tmp.hostname, tmp.port, encode=True

Check warning on line 263 in yarl/_url.py

View check run for this annotation

Codecov / codecov/patch

yarl/_url.py#L263

Added line #L263 was not covered by tests
)
elif not user and not password and not host and not port:
netloc = ""
else:
port = None if port == cls._default_port(scheme) else port
netloc = cls._make_netloc(
user, password, host, port, encode=not encoded, encode_host=not encoded
)
Expand Down Expand Up @@ -459,15 +455,12 @@
def _get_default_port(self) -> Union[int, None]:
if not self.scheme:
return None
return self._default_port(self.scheme)

@staticmethod
def _default_port(scheme: str) -> Union[int, None]:
with suppress(KeyError):
return DEFAULT_PORTS[scheme]
return DEFAULT_PORTS[self.scheme]

with suppress(OSError):
return socket.getservbyname(scheme)
return socket.getservbyname(self.scheme)

return None

Expand Down Expand Up @@ -769,7 +762,11 @@
f"Appending path {path!r} starting from slash is forbidden"
)
path = path if encoded else self._PATH_QUOTER(path)
segments = list(reversed(path.split("/")))
segments = [
segment for segment in reversed(path.split("/")) if segment != "."

Check warning on line 766 in yarl/_url.py

View check run for this annotation

Codecov / codecov/patch

yarl/_url.py#L766

Added line #L766 was not covered by tests
]
if not segments:
continue
# remove trailing empty segment for all but the last path
segment_slice_start = int(not last and segments[0] == "")
parsed += segments[segment_slice_start:]
Expand Down Expand Up @@ -1156,52 +1153,7 @@
# See docs for urllib.parse.urljoin
if not isinstance(url, URL):
raise TypeError("url should be URL")
other: URL = url
scheme = other.scheme or self.scheme
parts = {
k: getattr(self, k) or ""
for k in ("authority", "path", "query_string", "fragment")
}
parts["scheme"] = scheme

if scheme != self.scheme or scheme not in uses_relative:
return URL(str(other))

# scheme is in uses_authority as uses_authority is a superset of uses_relative
if scheme in uses_authority and other.authority:
parts.update(
{
k: getattr(other, k) or ""
for k in ("authority", "path", "query_string", "fragment")
}
)
return URL.build(**parts)

if other.path or other.fragment:
parts["fragment"] = other.fragment or ""
if other.path or other.query:
parts["query_string"] = other.query_string or ""

if not other.path:
return URL.build(**parts)

if other.path[0] == "/":
parts["path"] = other.path
return URL.build(**parts)

if self.path[-1] == "/":
# using an intermediate to avoid URL.joinpath dropping query & fragment
parts["path"] = URL(self.path).joinpath(other.path).path
else:
# …
# and relativizing ".."
path = URL("/".join([*self.parts[1:-1], ""])).joinpath(other.path).path
if parts["authority"]:
parts["path"] = "/" + path
else:
parts["path"] = path

return URL.build(**parts)
return URL(urljoin(str(self), str(url)), encoded=True)

def joinpath(self, *other, encoded=False):
"""Return a new URL with the elements in other appended to the path."""
Expand Down
Loading