From e3a282e5df921b7564b88e88dc8e354e96dccd79 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 29 Nov 2024 10:23:40 -0800 Subject: [PATCH] Make `from_parts` a LRU to increase the chance we can preserve the internal cache (#1434) --- CHANGES/1434.misc.rst | 1 + yarl/_url.py | 103 ++++++++++++++++-------------------------- 2 files changed, 39 insertions(+), 65 deletions(-) create mode 100644 CHANGES/1434.misc.rst diff --git a/CHANGES/1434.misc.rst b/CHANGES/1434.misc.rst new file mode 100644 index 00000000..89a6993c --- /dev/null +++ b/CHANGES/1434.misc.rst @@ -0,0 +1 @@ +Improved cache performance for operations that produce a new :class:`~yarl.URL` object -- by :user:`bdraco`. diff --git a/yarl/_url.py b/yarl/_url.py index 4bf3c2dd..0bf68597 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -213,6 +213,19 @@ def pre_encoded_url(url_str: str) -> "URL": return self +@lru_cache +def from_parts(scheme: str, netloc: str, path: str, query: str, fragment: str) -> "URL": + """Create a new URL from parts.""" + self = object.__new__(URL) + self._scheme = scheme + self._netloc = netloc + self._path = path + self._query = query + self._fragment = fragment + self._cache = {} + return self + + @rewrite_module class URL: # Don't derive from str @@ -308,10 +321,7 @@ def __new__( if type(val) is SplitResult: if not encoded: raise ValueError("Cannot apply decoding to SplitResult") - self = object.__new__(URL) - self._scheme, self._netloc, self._path, self._query, self._fragment = val - self._cache = {} - return self + return from_parts(*val) if isinstance(val, str): return pre_encoded_url(str(val)) if encoded else encode_url(str(val)) if val is UNDEFINED: @@ -421,20 +431,6 @@ def build( url._cache = {} return url - @classmethod - def _from_parts( - cls, scheme: str, netloc: str, path: str, query: str, fragment: str - ) -> "URL": - """Create a new URL from parts.""" - self = object.__new__(cls) - self._scheme = scheme - self._netloc = netloc - self._path = path - self._query = query - self._fragment = fragment - self._cache = {} - return self - def __init_subclass__(cls): raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") @@ -587,7 +583,7 @@ def _origin(self) -> "URL": netloc = make_netloc(None, None, encoded_host, self.explicit_port) elif not self._path and not self._query and not self._fragment: return self - return self._from_parts(scheme, netloc, "", "", "") + return from_parts(scheme, netloc, "", "", "") def relative(self) -> "URL": """Return a relative part of the URL. @@ -597,7 +593,7 @@ def relative(self) -> "URL": """ if not self._netloc: raise ValueError("URL should be absolute") - return self._from_parts("", "", self._path, self._query, self._fragment) + return from_parts("", "", self._path, self._query, self._fragment) @cached_property def absolute(self) -> bool: @@ -916,12 +912,10 @@ def parent(self) -> "URL": path = self._path if not path or path == "/": if self._fragment or self._query: - return self._from_parts(self._scheme, self._netloc, path, "", "") + return from_parts(self._scheme, self._netloc, path, "", "") return self parts = path.split("/") - return self._from_parts( - self._scheme, self._netloc, "/".join(parts[:-1]), "", "" - ) + return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "") @cached_property def raw_name(self) -> str: @@ -998,13 +992,13 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": parsed.reverse() if not netloc or not needs_normalize: - return self._from_parts(self._scheme, netloc, "/".join(parsed), "", "") + return from_parts(self._scheme, netloc, "/".join(parsed), "", "") path = "/".join(normalize_path_segments(parsed)) # If normalizing the path segments removed the leading slash, add it back. if path and path[0] != "/": path = f"/{path}" - return self._from_parts(self._scheme, netloc, path, "", "") + return from_parts(self._scheme, netloc, path, "", "") def with_scheme(self, scheme: str) -> "URL": """Return a new URL with scheme replaced.""" @@ -1019,9 +1013,7 @@ def with_scheme(self, scheme: str) -> "URL": f"relative URLs for the {lower_scheme} scheme" ) raise ValueError(msg) - return self._from_parts( - lower_scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment) def with_user(self, user: Union[str, None]) -> "URL": """Return a new URL with user replaced. @@ -1043,9 +1035,7 @@ def with_user(self, user: Union[str, None]) -> "URL": raise ValueError("user replacement is not allowed for relative URLs") encoded_host = self.host_subcomponent or "" netloc = make_netloc(user, password, encoded_host, self.explicit_port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_password(self, password: Union[str, None]) -> "URL": """Return a new URL with password replaced. @@ -1067,9 +1057,7 @@ def with_password(self, password: Union[str, None]) -> "URL": encoded_host = self.host_subcomponent or "" port = self.explicit_port netloc = make_netloc(self.raw_user, password, encoded_host, port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_host(self, host: str) -> "URL": """Return a new URL with host replaced. @@ -1090,9 +1078,7 @@ def with_host(self, host: str) -> "URL": encoded_host = _encode_host(host, validate_host=True) if host else "" port = self.explicit_port netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_port(self, port: Union[int, None]) -> "URL": """Return a new URL with port replaced. @@ -1110,9 +1096,7 @@ def with_port(self, port: Union[int, None]) -> "URL": raise ValueError("port replacement is not allowed for relative URLs") encoded_host = self.host_subcomponent or "" netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_path( self, @@ -1132,7 +1116,7 @@ def with_path( path = f"/{path}" query = self._query if keep_query else "" fragment = self._fragment if keep_fragment else "" - return self._from_parts(self._scheme, netloc, path, query, fragment) + return from_parts(self._scheme, netloc, path, query, fragment) @overload def with_query(self, query: Query) -> "URL": ... @@ -1155,9 +1139,7 @@ def with_query(self, *args: Any, **kwargs: Any) -> "URL": """ # N.B. doesn't cleanup query/fragment query = get_str_query(*args, **kwargs) or "" - return self._from_parts( - self._scheme, self._netloc, self._path, query, self._fragment - ) + return from_parts(self._scheme, self._netloc, self._path, query, self._fragment) @overload def extend_query(self, query: Query) -> "URL": ... @@ -1183,9 +1165,7 @@ def extend_query(self, *args: Any, **kwargs: Any) -> "URL": query += new_query if query[-1] == "&" else f"&{new_query}" else: query = new_query - return self._from_parts( - self._scheme, self._netloc, self._path, query, self._fragment - ) + return from_parts(self._scheme, self._netloc, self._path, query, self._fragment) @overload def update_query(self, query: Query) -> "URL": ... @@ -1242,9 +1222,7 @@ def update_query(self, *args: Any, **kwargs: Any) -> "URL": "Invalid query type: only str, mapping or " "sequence of (key, value) pairs is allowed" ) - return self._from_parts( - self._scheme, self._netloc, self._path, query, self._fragment - ) + return from_parts(self._scheme, self._netloc, self._path, query, self._fragment) def without_query_params(self, *query_params: str) -> "URL": """Remove some keys from query part and return new URL.""" @@ -1276,7 +1254,7 @@ def with_fragment(self, fragment: Union[str, None]) -> "URL": raw_fragment = FRAGMENT_QUOTER(fragment) if self._fragment == raw_fragment: return self - return self._from_parts( + return from_parts( self._scheme, self._netloc, self._path, self._query, raw_fragment ) @@ -1316,7 +1294,7 @@ def with_name( query = self._query if keep_query else "" fragment = self._fragment if keep_fragment else "" - return self._from_parts(self._scheme, netloc, "/".join(parts), query, fragment) + return from_parts(self._scheme, netloc, "/".join(parts), query, fragment) def with_suffix( self, @@ -1364,9 +1342,7 @@ def join(self, url: "URL") -> "URL": # scheme is in uses_authority as uses_authority is a superset of uses_relative if (join_netloc := url._netloc) and scheme in USES_AUTHORITY: - return self._from_parts( - scheme, join_netloc, url._path, url._query, url._fragment - ) + return from_parts(scheme, join_netloc, url._path, url._query, url._fragment) orig_path = self._path if join_path := url._path: @@ -1389,16 +1365,13 @@ def join(self, url: "URL") -> "URL": else: path = orig_path - new_url = object.__new__(URL) - new_url._scheme = scheme - new_url._netloc = self._netloc - new_url._path = path - new_url._query = url._query if join_path or url._query else self._query - new_url._fragment = ( - url._fragment if join_path or url._fragment else self._fragment + return from_parts( + scheme, + self._netloc, + path, + url._query if join_path or url._query else self._query, + url._fragment if join_path or url._fragment else self._fragment, ) - new_url._cache = {} - return new_url def joinpath(self, *other: str, encoded: bool = False) -> "URL": """Return a new URL with the elements in other appended to the path."""