Skip to content

Commit

Permalink
Complete removal of SplitResult as an internal
Browse files Browse the repository at this point in the history
closes #172
  • Loading branch information
bdraco committed Oct 30, 2024
1 parent 24f814d commit 42bdf96
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 44 deletions.
1 change: 0 additions & 1 deletion tests/test_url_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,6 @@ def test_url_round_trips(
) -> None:
"""Verify that URLs round-trip correctly."""
parsed = URL(url)
assert parsed._val.hostname == hostname_without_brackets
assert parsed.raw_host == hostname_without_brackets
assert parsed.host_subcomponent == hostname
assert str(parsed) == url
Expand Down
97 changes: 54 additions & 43 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
)

_T = TypeVar("_T")
_SplitTuple = tuple[str, str, str, str, str]

if sys.version_info >= (3, 11):
from typing import Self
Expand Down Expand Up @@ -123,7 +124,7 @@ def rewrite_module(obj: _T) -> _T:


@lru_cache
def encode_url(url_str: str) -> tuple[SplitResult, _InternalURLCache]:
def encode_url(url_str: str) -> tuple[_SplitTuple, _InternalURLCache]:
"""Parse unencoded URL."""
cache: _InternalURLCache = {}
host: Union[str, None]
Expand Down Expand Up @@ -177,13 +178,13 @@ def encode_url(url_str: str) -> tuple[SplitResult, _InternalURLCache]:
# the lambda and arg processing since NamedTuples are constructed
# with a run time built lambda
# https://github.com/python/cpython/blob/d83fcf8371f2f33c7797bc8f5423a8bca8c46e5c/Lib/collections/__init__.py#L441
return tuple.__new__(SplitResult, (scheme, netloc, path, query, fragment)), cache
return (scheme, netloc, path, query, fragment), cache


@lru_cache
def pre_encoded_url(url_str: str) -> tuple[SplitResult, _InternalURLCache]:
def pre_encoded_url(url_str: str) -> tuple[_SplitTuple, _InternalURLCache]:
"""Parse pre-encoded URL."""
return tuple.__new__(SplitResult, split_url(url_str)), {}
return split_url(url_str), {}


@rewrite_module
Expand Down Expand Up @@ -259,7 +260,7 @@ class URL:
# absolute-URI = scheme ":" hier-part [ "?" query ]
__slots__ = ("_cache", "_val")

_val: SplitResult
_val: _SplitTuple

def __new__(
cls,
Expand Down Expand Up @@ -387,22 +388,20 @@ def build(
# arg processing since NamedTuples are constructed with a run time built
# lambda
# https://github.com/python/cpython/blob/d83fcf8371f2f33c7797bc8f5423a8bca8c46e5c/Lib/collections/__init__.py#L441
url._val = tuple.__new__(
SplitResult, (scheme, netloc, path, query_string, fragment)
)
url._val = (scheme, netloc, path, query_string, fragment)
url._cache = {}
return url

@classmethod
def _from_tup(cls, val: tuple[str, str, str, str, str]) -> "URL":
def _from_tup(cls, val: _SplitTuple) -> "URL":
"""Create a new URL from a tuple.
The tuple should be in the form of a SplitResult.
(scheme, netloc, path, query, fragment)
"""
self = object.__new__(cls)
self._val = tuple.__new__(SplitResult, val)
self._val = val
self._cache = {}
return self

Expand All @@ -412,7 +411,7 @@ def __init_subclass__(cls):
def __str__(self) -> str:
val = self._val
scheme, netloc, path, query, fragment = val
if not val.path and val.netloc and (val.query or val.fragment):
if not path and netloc and (query or fragment):
path = "/"
if (port := self.explicit_port) is not None and port == self._default_port:
# port normalization - using None for default ports to remove from rendering
Expand All @@ -431,15 +430,13 @@ def __eq__(self, other: object) -> bool:
if type(other) is not URL:
return NotImplemented

val1 = self._val
if not val1.path and val1.netloc:
scheme, netloc, _, query, fragment = val1
val1 = tuple.__new__(SplitResult, (scheme, netloc, "/", query, fragment))
scheme1, netloc1, path1, query1, fragment1 = self._val
if not path1 and netloc1:
val1 = (scheme1, netloc1, "/", query1, fragment1)

val2 = other._val
if not val2.path and val2.netloc:
scheme, netloc, _, query, fragment = val2
val2 = tuple.__new__(SplitResult, (scheme, netloc, "/", query, fragment))
scheme2, netloc2, path2, query2, fragment2 = other._val
if not path2 and netloc2:
val2 = (scheme2, netloc2, "/", query2, fragment2)

return val1 == val2

Expand All @@ -448,7 +445,7 @@ def __hash__(self) -> int:
val = self._val
scheme, netloc, path, query, fragment = val
if not path and netloc:
val = tuple.__new__(SplitResult, (scheme, netloc, "/", query, fragment))
val = (scheme, netloc, "/", query, fragment)
ret = self._cache["hash"] = hash(val)
return ret

Expand Down Expand Up @@ -481,24 +478,25 @@ def __mod__(self, query: Query) -> "URL":
return self.update_query(query)

def __bool__(self) -> bool:
val = self._val
return bool(val.netloc or val.path or val.query or val.fragment)
_, netloc, path, query, fragment = self._val
return bool(netloc or path or query or fragment)

def __getstate__(self) -> tuple[SplitResult]:
return (self._val,)
return (tuple.__new__(SplitResult, self._val),)

def __setstate__(self, state):
if state[0] is None and isinstance(state[1], dict):
# default style pickle
self._val = state[1]["_val"]
self._val = tuple.__new__(SplitResult, state[1]["_val"])
else:
self._val, *unused = state
self._cache = {}

def _cache_netloc(self) -> None:
"""Cache the netloc parts of the URL."""
c = self._cache
split_loc = split_netloc(self._val.netloc)
_, netloc, _, _, _ = self._val
split_loc = split_netloc(netloc)
c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc

def is_absolute(self) -> bool:
Expand Down Expand Up @@ -526,7 +524,8 @@ def is_default_port(self) -> bool:
# If the explicit port is None, then the URL must be
# using the default port unless its a relative URL
# which does not have an implicit port / default port
return self._val.netloc != ""
_, netloc, _, _, _ = self._val
return netloc != ""
return explicit == self._default_port

def origin(self) -> "URL":
Expand Down Expand Up @@ -579,7 +578,8 @@ def absolute(self) -> bool:
# Checking `netloc` is faster than checking `hostname`
# because `hostname` is a property that does some extra work
# to parse the host from the `netloc`
return self._val.netloc != ""
_, netloc, _, _, _ = self._val
return netloc != ""

@cached_property
def scheme(self) -> str:
Expand All @@ -588,7 +588,8 @@ def scheme(self) -> str:
Empty string for relative URLs or URLs starting with //
"""
return self._val.scheme
scheme, _, _, _, _ = self._val
return scheme

@cached_property
def raw_authority(self) -> str:
Expand All @@ -597,12 +598,14 @@ def raw_authority(self) -> str:
Empty string for relative URLs.
"""
return self._val.netloc
_, netloc, _, _, _ = self._val
return netloc

@cached_property
def _default_port(self) -> Union[int, None]:
"""Default port for the scheme or None if not known."""
return DEFAULT_PORTS.get(self._val.scheme)
scheme, _, _, _, _ = self._val
return DEFAULT_PORTS.get(scheme)

@cached_property
def authority(self) -> str:
Expand Down Expand Up @@ -771,7 +774,8 @@ def raw_path(self) -> str:
/ for absolute URLs without path part.
"""
return "/" if not (path := self._val.path) and self._val.netloc else path
_, netloc, path, _, _ = self._val
return "/" if not path and netloc else path

@cached_property
def path(self) -> str:
Expand All @@ -796,7 +800,8 @@ def path_safe(self) -> str:
@cached_property
def _parsed_query(self) -> list[tuple[str, str]]:
"""Parse query part of URL."""
return parse_qsl(self._val.query, keep_blank_values=True)
_, _, _, query, _ = self._val
return parse_qsl(query, keep_blank_values=True)

@cached_property
def query(self) -> "MultiDictProxy[str]":
Expand All @@ -815,7 +820,8 @@ def raw_query_string(self) -> str:
Empty string if query is missing.
"""
return self._val.query
_, _, _, query, _ = self._val
return query

@cached_property
def query_string(self) -> str:
Expand All @@ -824,7 +830,8 @@ def query_string(self) -> str:
Empty string if query is missing.
"""
return QS_UNQUOTER(self._val.query)
_, _, _, query, _ = self._val
return QS_UNQUOTER(query)

@cached_property
def path_qs(self) -> str:
Expand All @@ -834,7 +841,8 @@ def path_qs(self) -> str:
@cached_property
def raw_path_qs(self) -> str:
"""Encoded path of URL with query."""
return self.raw_path if not (q := self._val.query) else f"{self.raw_path}?{q}"
_, _, _, query, _ = self._val
return self.raw_path if not query else f"{self.raw_path}?{query}"

@cached_property
def raw_fragment(self) -> str:
Expand All @@ -843,7 +851,8 @@ def raw_fragment(self) -> str:
Empty string if fragment is missing.
"""
return self._val.fragment
_, _, _, _, fragment = self._val
return fragment

@cached_property
def fragment(self) -> str:
Expand All @@ -852,7 +861,8 @@ def fragment(self) -> str:
Empty string if fragment is missing.
"""
return UNQUOTER(self._val.fragment)
_, _, _, _, fragment = self._val
return UNQUOTER(fragment)

@cached_property
def raw_parts(self) -> tuple[str, ...]:
Expand All @@ -861,8 +871,8 @@ def raw_parts(self) -> tuple[str, ...]:
('/',) for absolute URLs if *path* is missing.
"""
path = self._val.path
if self._val.netloc:
_, netloc, path, _, _ = self._val
if netloc:
return ("/", *path[1:].split("/")) if path else ("/",)
if path and path[0] == "/":
return ("/", *path[1:].split("/"))
Expand Down Expand Up @@ -895,7 +905,8 @@ def parent(self) -> "URL":
def raw_name(self) -> str:
"""The last part of raw_parts."""
parts = self.raw_parts
if not self._val.netloc:
_, netloc, _, _, _ = self._val
if not netloc:
return parts[-1]
parts = parts[1:]
return parts[-1] if parts else ""
Expand Down Expand Up @@ -1223,9 +1234,9 @@ def with_fragment(self, fragment: Union[str, None]) -> "URL":
raise TypeError("Invalid fragment type")
else:
raw_fragment = FRAGMENT_QUOTER(fragment)
if self._val.fragment == raw_fragment:
scheme, netloc, path, query, fragment = self._val
if fragment == raw_fragment:
return self
scheme, netloc, path, query, _ = self._val
return self._from_tup((scheme, netloc, path, query, raw_fragment))

def with_name(self, name: str) -> "URL":
Expand Down Expand Up @@ -1349,7 +1360,7 @@ def human_repr(self) -> str:
if TYPE_CHECKING:
assert fragment is not None
netloc = make_netloc(user, password, host, self.explicit_port)
scheme = self._val.scheme
scheme, _, _, _, _ = self._val
return unsplit_result(scheme, netloc, path, query_string, fragment)


Expand Down

0 comments on commit 42bdf96

Please sign in to comment.