Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete removal of SplitResult as an internal #1396

Merged
merged 10 commits into from
Oct 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/1396.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance many :class:`~yarl.URL` methods -- by :user:`bdraco`.
5 changes: 3 additions & 2 deletions tests/test_pickle.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@ def test_pickle():
def test_default_style_state():
u = URL("test")
hash(u)
u.__setstate__((None, {"_val": "test", "_strict": False, "_cache": {"hash": 1}}))
val = ("test", "test", "test", "test", "test")
u.__setstate__((None, {"_val": val, "_strict": False, "_cache": {"hash": 1}}))
assert not u._cache
assert u._val == "test"
assert u._val == val
50 changes: 25 additions & 25 deletions tests/test_url.py
Original file line number Diff line number Diff line change
@@ -132,13 +132,13 @@ def test_scheme():
def test_raw_user():
url = URL("http://[email protected]")
assert "user" == url.raw_user
assert url.raw_user == url._val.username
assert url.raw_user == SplitResult(*url._val).username


def test_raw_user_non_ascii():
url = URL("http://бажан@example.com")
assert "%D0%B1%D0%B0%D0%B6%D0%B0%D0%BD" == url.raw_user
assert url.raw_user == url._val.username
assert url.raw_user == SplitResult(*url._val).username


def test_no_user():
@@ -154,13 +154,13 @@ def test_user_non_ascii():
def test_raw_password():
url = URL("http://user:[email protected]")
assert "password" == url.raw_password
assert url.raw_password == url._val.password
assert url.raw_password == SplitResult(*url._val).password


def test_raw_password_non_ascii():
url = URL("http://user:пароль@example.com")
assert "%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" == url.raw_password
assert url.raw_password == url._val.password
assert url.raw_password == SplitResult(*url._val).password


def test_password_non_ascii():
@@ -179,7 +179,7 @@ def test_empty_password_without_user():
assert url.user is None
assert url.password == ""
assert url.raw_password == ""
assert url.raw_password == url._val.password
assert url.raw_password == SplitResult(*url._val).password


def test_user_empty_password():
@@ -191,7 +191,7 @@ def test_user_empty_password():
def test_raw_host():
url = URL("http://example.com")
assert "example.com" == url.raw_host
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


@pytest.mark.parametrize(
@@ -244,7 +244,7 @@ def test_invalid_idna_a_label_encoding():
def test_raw_host_non_ascii():
url = URL("http://оун-упа.укр")
assert "xn----8sb1bdhvc.xn--j1amh" == url.raw_host
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_host_non_ascii():
@@ -265,19 +265,19 @@ def test_host_with_underscore():
def test_raw_host_when_port_is_specified():
url = URL("http://example.com:8888")
assert "example.com" == url.raw_host
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_raw_host_from_str_with_ipv4():
url = URL("http://127.0.0.1:80")
assert url.raw_host == "127.0.0.1"
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_raw_host_from_str_with_ipv6():
url = URL("http://[::1]:80")
assert url.raw_host == "::1"
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_authority_full() -> None:
@@ -311,21 +311,21 @@ def test_lowercase():
url = URL("http://gitHUB.com")
assert url.raw_host == "github.com"
assert url.host == url.raw_host
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_lowercase_nonascii():
url = URL("http://Слава.Укр")
assert url.raw_host == "xn--80aaf8a3a.xn--j1amh"
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname
assert url.host == "слава.укр"


def test_compressed_ipv6():
url = URL("http://[1DEC:0:0:0::1]")
assert url.raw_host == "1dec::1"
assert url.host == url.raw_host
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_ipv6_missing_left_bracket():
@@ -353,19 +353,19 @@ def test_ipv4_zone():
url = URL("http://1.2.3.4%тест%42:123")
assert url.raw_host == "1.2.3.4%тест%42"
assert url.host == url.raw_host
assert url.raw_host == url._val.hostname
assert url.raw_host == SplitResult(*url._val).hostname


def test_port_for_explicit_port():
url = URL("http://example.com:8888")
assert 8888 == url.port
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port


def test_port_for_implicit_port():
url = URL("http://example.com")
assert 80 == url.port
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port


def test_port_for_relative_url():
@@ -383,25 +383,25 @@ def test_port_for_unknown_scheme():
def test_explicit_port_for_explicit_port():
url = URL("http://example.com:8888")
assert 8888 == url.explicit_port
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port


def test_explicit_port_for_implicit_port():
url = URL("http://example.com")
assert url.explicit_port is None
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port


def test_explicit_port_for_relative_url():
url = URL("/path/to")
assert url.explicit_port is None
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port


def test_explicit_port_for_unknown_scheme():
url = URL("unknown://example.com")
assert url.explicit_port is None
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port


def test_raw_path_string_empty():
@@ -1563,7 +1563,7 @@ def test_is_default_port_for_unknown_scheme():
def test_handling_port_zero():
url = URL("http://example.com:0")
assert url.explicit_port == 0
assert url.explicit_port == url._val.port
assert url.explicit_port == SplitResult(*url._val).port
assert str(url) == "http://example.com:0"
assert not url.is_default_port()

@@ -1767,13 +1767,13 @@ def test_parent_for_empty_url():
def test_parent_for_relative_url_with_child():
url = URL("path/to")
assert url.parent == URL("path")
assert url.parent._val.path == "path"
assert SplitResult(*url.parent._val).path == "path"


def test_parent_for_relative_url():
url = URL("path")
assert url.parent == URL("")
assert url.parent._val.path == ""
assert SplitResult(*url.parent._val).path == ""


def test_parent_for_no_netloc_url():
@@ -1784,7 +1784,7 @@ def test_parent_for_no_netloc_url():
def test_parent_for_top_level_no_netloc_url():
url = URL("/")
assert url.parent == URL("/")
assert url.parent._val.path == "/"
assert SplitResult(*url.parent._val).path == "/"


def test_parent_for_absolute_url():
@@ -1795,7 +1795,7 @@ def test_parent_for_absolute_url():
def test_parent_for_top_level_absolute_url():
url = URL("http://go.to/")
assert url.parent == URL("http://go.to/")
assert url.parent._val.path == "/"
assert SplitResult(*url.parent._val).path == "/"


def test_empty_value_for_query():
4 changes: 3 additions & 1 deletion tests/test_url_parsing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from urllib.parse import SplitResult

import pytest

from yarl import URL
@@ -616,7 +618,7 @@ def test_url_round_trips(
) -> None:
"""Verify that URLs round-trip correctly."""
parsed = URL(url)
assert parsed._val.hostname == hostname_without_brackets
assert SplitResult(*parsed._val).hostname == hostname_without_brackets
assert parsed.raw_host == hostname_without_brackets
assert parsed.host_subcomponent == hostname
assert str(parsed) == url
11 changes: 9 additions & 2 deletions yarl/_parse.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import re
import unicodedata
from functools import lru_cache
from typing import Union
from typing import Final, Union
from urllib.parse import scheme_chars, uses_netloc

from ._quoters import QUOTER
@@ -19,8 +19,15 @@
UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
USES_AUTHORITY = frozenset(uses_netloc)

SplitURL = tuple[str, str, str, str, str]
SCHEME: Final[int] = 0
NETLOC: Final[int] = 1
PATH: Final[int] = 2
QUERY: Final[int] = 3
FRAGMENT: Final[int] = 4

def split_url(url: str) -> tuple[str, str, str, str, str]:

def split_url(url: str) -> SplitURL:
"""Split URL into parts."""
# Adapted from urllib.parse.urlsplit
# Only lstrip url as some applications rely on preserving trailing space.
Loading