Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize URL string ports per RFC 3986 section 3.2.3 #1033

Merged
merged 19 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES/1033.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
The default protocol ports of well-known URI schemes are now taken into account
during the normalization of the URL string representation in accordance with
:rfc:`3986#section-3.2.3`.

Specified ports are removed from the :class:`str` representation of a :class:`~yarl.URL`
if the port matches the scheme's default port -- by :user:`commonism`.
7 changes: 7 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@
assert url.authority == "степан:пароль@слава.укр:8080"


def test_authority_unknown_scheme() -> None:
v = "scheme://user:[email protected]:43/path/to?a=1&b=2"
url = URL(v)
assert str(url) == v


def test_lowercase():
url = URL("http://gitHUB.com")
assert url.raw_host == "github.com"
Expand Down Expand Up @@ -1332,6 +1338,7 @@
def test_is_default_port_for_absolute_url_with_default_port():
url = URL("http://example.com:80")
assert url.is_default_port()
assert str(url) == "http://example.com"

Check warning on line 1341 in tests/test_url.py

View check run for this annotation

Codecov / codecov/patch

tests/test_url.py#L1341

Added line #L1341 was not covered by tests


def test_is_default_port_for_absolute_url_with_nondefault_port():
Expand Down
25 changes: 24 additions & 1 deletion tests/test_url_update_netloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,37 @@
assert str(url.with_port(8888)) == "http://example.com:8888"


def test_with_default_port_normalization() -> None:
url = URL("http://example.com")
assert str(url.with_scheme("https")) == "https://example.com"
assert str(url.with_scheme("https").with_port(443)) == "https://example.com"
assert str(url.with_port(443).with_scheme("https")) == "https://example.com"


def test_with_custom_port_normalization() -> None:
url = URL("http://example.com")
u88 = url.with_port(88)
assert str(u88) == "http://example.com:88"
assert str(u88.with_port(80)) == "http://example.com"
assert str(u88.with_scheme("https")) == "https://example.com:88"


def test_with_explicit_port_normalization() -> None:
url = URL("http://example.com")
u80 = url.with_port(80)
assert str(u80) == "http://example.com"
assert str(u80.with_port(81)) == "http://example.com:81"
assert str(u80.with_scheme("https")) == "https://example.com:80"


def test_with_port_with_no_port():
url = URL("http://example.com")
assert str(url.with_port(None)) == "http://example.com"


def test_with_port_ipv6():
url = URL("http://[::1]:8080/")
assert str(url.with_port(80)) == "http://[::1]:80/"
assert str(url.with_port(81)) == "http://[::1]:81/"

Check warning on line 224 in tests/test_url_update_netloc.py

View check run for this annotation

Codecov / codecov/patch

tests/test_url_update_netloc.py#L224

Added line #L224 was not covered by tests


def test_with_port_keeps_query_and_fragment():
Expand Down
43 changes: 39 additions & 4 deletions yarl/_url.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import functools
import math
import socket
import warnings
from collections.abc import Mapping, Sequence
from contextlib import suppress
from ipaddress import ip_address
from typing import Union
from urllib.parse import SplitResult, parse_qsl, quote, urljoin, urlsplit, urlunsplit

import idna
Expand Down Expand Up @@ -291,6 +293,18 @@
val = self._val
if not val.path and self.is_absolute() and (val.query or val.fragment):
val = val._replace(path="/")
if (port := self._get_port()) is None:
commonism marked this conversation as resolved.
Show resolved Hide resolved
# port normalization - using None for default ports to remove from rendering
# https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3
commonism marked this conversation as resolved.
Show resolved Hide resolved
val = val._replace(
netloc=self._make_netloc(
self.raw_user,
self.raw_password,
self.raw_host,

Check warning on line 303 in yarl/_url.py

View check run for this annotation

Codecov / codecov/patch

yarl/_url.py#L300-L303

Added lines #L300 - L303 were not covered by tests
port,
encode_host=False,

Check warning on line 305 in yarl/_url.py

View check run for this annotation

Codecov / codecov/patch

yarl/_url.py#L305

Added line #L305 was not covered by tests
)
)
return urlunsplit(val)

def __repr__(self):
Expand Down Expand Up @@ -382,10 +396,13 @@
e.g. 'http://python.org' or 'http://python.org:80', False
otherwise.

Return False for relative URLs.

"""
if self.port is None:
return False
default = DEFAULT_PORTS.get(self.scheme)
if self.explicit_port is None:
# A relative URL does not have an implicit port / default port
return self.port is not None
default = self._get_default_port()
if default is None:
return False
return self.port == default
Expand Down Expand Up @@ -435,6 +452,24 @@
"""
return self._val.netloc

def _get_default_port(self) -> Union[int, None]:
if not self.scheme:
return None

with suppress(KeyError):
return DEFAULT_PORTS[self.scheme]

with suppress(OSError):
return socket.getservbyname(self.scheme)

return None

def _get_port(self) -> Union[int, None]:
"""Port or None if default port"""
if self._get_default_port() == self.port:
return None
return self.port

@cached_property
def authority(self):
"""Decoded authority part of URL.
Expand Down Expand Up @@ -522,7 +557,7 @@
scheme without default port substitution.

"""
return self._val.port or DEFAULT_PORTS.get(self._val.scheme)
return self._val.port or self._get_default_port()

@property
def explicit_port(self):
Expand Down
Loading