diff --git a/CHANGES/1159.feature.rst b/CHANGES/1159.feature.rst new file mode 100644 index 000000000..25d06f550 --- /dev/null +++ b/CHANGES/1159.feature.rst @@ -0,0 +1,3 @@ +Added :attr:`~yarl.URL.host_subcomponent` which returns the :rfc:`3986#section-3.2.2` host subcomponent -- by :user:`bdraco`. + +The only current practical difference between :attr:`~yarl.URL.raw_host` and :attr:`~yarl.URL.host_subcomponent` is that IPv6 addresses are returned bracketed. diff --git a/docs/api.rst b/docs/api.rst index 7dc250c64..170df7f33 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -191,7 +191,22 @@ There are two kinds of properties: *decoded* and *encoded* (with >>> URL('http://хост.домен').raw_host 'xn--n1agdj.xn--d1acufc' + >>> URL('http://[::1]').raw_host + '::1' + +.. attribute:: URL.host_subcomponent + + :rfc:`3986#section-3.2.2` host subcomponent part of URL, ``None`` for relative URLs + (:ref:`yarl-api-relative-urls`). + + .. doctest:: + + >>> URL('http://хост.домен').host_subcomponent + 'xn--n1agdj.xn--d1acufc' + >>> URL('http://[::1]').host_subcomponent + '[::1]' + .. versionadded:: 1.13 .. attribute:: URL.port diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 4714f0af5..6b90d22e4 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -42,6 +42,7 @@ runtimes sdist subclass subclasses +subcomponent svetlov uncompiled v1 diff --git a/tests/test_url.py b/tests/test_url.py index 1dbdd80a0..5115c8749 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -176,6 +176,24 @@ def test_raw_host(): assert url.raw_host == url._val.hostname +@pytest.mark.parametrize( + ("host"), + [ + ("example.com"), + ("[::1]"), + ("xn--gnter-4ya.com"), + ], +) +def test_host_subcomponent(host: str): + url = URL(f"http://{host}") + assert url.host_subcomponent == host + + +def test_host_subcomponent_return_idna_encoded_host(): + url = URL("http://оун-упа.укр") + assert url.host_subcomponent == "xn----8sb1bdhvc.xn--j1amh" + + def test_raw_host_non_ascii(): url = URL("http://оун-упа.укр") assert "xn----8sb1bdhvc.xn--j1amh" == url.raw_host diff --git a/yarl/_url.py b/yarl/_url.py index 26da688ac..a61a7f1d2 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -647,6 +647,8 @@ def raw_host(self) -> Union[str, None]: None for relative URLs. + When working with IPv6 addresses, use the `host_subcomponent` property instead + as it will return the host subcomponent with brackets. """ # Use host instead of hostname for sake of shortness # May add .hostname prop later @@ -660,16 +662,35 @@ def host(self) -> Union[str, None]: None for relative URLs. """ - raw = self.raw_host - if raw is None: + if (raw := self.raw_host) is None: return None - if "%" in raw: - # Hack for scoped IPv6 addresses like - # fe80::2%Перевірка - # presence of '%' sign means only IPv6 address, so idna is useless. + if raw and raw[-1].isdigit() or ":" in raw: + # IP addresses are never IDNA encoded return raw return _idna_decode(raw) + @cached_property + def host_subcomponent(self) -> Union[str, None]: + """Return the host subcomponent part of URL. + + None for relative URLs. + + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + + `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` + + Examples: + - `http://example.com:8080` -> `example.com` + - `http://example.com:80` -> `example.com` + - `https://127.0.0.1:8443` -> `127.0.0.1` + - `https://[::1]:8443` -> `[::1]` + - `http://[::1]` -> `[::1]` + + """ + if (raw := self.raw_host) is None: + return None + return f"[{raw}]" if ":" in raw else raw + @cached_property def port(self) -> Union[int, None]: """Port part of URL, with scheme-based fallback. @@ -953,7 +974,8 @@ def _encode_host(cls, host: str, human: bool = False) -> str: # - 127.0.0.1 (last character is a digit) # - 2001:db8::ff00:42:8329 (contains a colon) # - 2001:db8::ff00:42:8329%eth0 (contains a colon) - # - [2001:db8::ff00:42:8329] (contains a colon) + # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should + # have been removed before it gets here) # Rare IP Address formats are not supported per: # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4 #