From 0799de333f5852a41ab4c4a662f255e14a7214ae Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 29 Nov 2024 11:39:06 -0600 Subject: [PATCH 1/4] DNM: Experiment with making from_parts an LRU --- yarl/_url.py | 112 ++++++++++++++++++--------------------------------- 1 file changed, 39 insertions(+), 73 deletions(-) diff --git a/yarl/_url.py b/yarl/_url.py index 4bf3c2dd..db331c01 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -213,6 +213,19 @@ def pre_encoded_url(url_str: str) -> "URL": return self +@lru_cache +def from_parts(scheme: str, netloc: str, path: str, query: str, fragment: str) -> "URL": + """Create a new URL from parts.""" + self = object.__new__(URL) + self._scheme = scheme + self._netloc = netloc + self._path = path + self._query = query + self._fragment = fragment + self._cache = {} + return self + + @rewrite_module class URL: # Don't derive from str @@ -308,10 +321,7 @@ def __new__( if type(val) is SplitResult: if not encoded: raise ValueError("Cannot apply decoding to SplitResult") - self = object.__new__(URL) - self._scheme, self._netloc, self._path, self._query, self._fragment = val - self._cache = {} - return self + return from_parts(*val) if isinstance(val, str): return pre_encoded_url(str(val)) if encoded else encode_url(str(val)) if val is UNDEFINED: @@ -412,28 +422,7 @@ def build( if query: query_string = get_str_query(query) or "" - url = object.__new__(cls) - url._scheme = scheme - url._netloc = netloc - url._path = path - url._query = query_string - url._fragment = fragment - url._cache = {} - return url - - @classmethod - def _from_parts( - cls, scheme: str, netloc: str, path: str, query: str, fragment: str - ) -> "URL": - """Create a new URL from parts.""" - self = object.__new__(cls) - self._scheme = scheme - self._netloc = netloc - self._path = path - self._query = query - self._fragment = fragment - self._cache = {} - return self + return from_parts(scheme, netloc, path, query_string, fragment) def __init_subclass__(cls): raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") @@ -587,7 +576,7 @@ def _origin(self) -> "URL": netloc = make_netloc(None, None, encoded_host, self.explicit_port) elif not self._path and not self._query and not self._fragment: return self - return self._from_parts(scheme, netloc, "", "", "") + return from_parts(scheme, netloc, "", "", "") def relative(self) -> "URL": """Return a relative part of the URL. @@ -597,7 +586,7 @@ def relative(self) -> "URL": """ if not self._netloc: raise ValueError("URL should be absolute") - return self._from_parts("", "", self._path, self._query, self._fragment) + return from_parts("", "", self._path, self._query, self._fragment) @cached_property def absolute(self) -> bool: @@ -916,12 +905,10 @@ def parent(self) -> "URL": path = self._path if not path or path == "/": if self._fragment or self._query: - return self._from_parts(self._scheme, self._netloc, path, "", "") + return from_parts(self._scheme, self._netloc, path, "", "") return self parts = path.split("/") - return self._from_parts( - self._scheme, self._netloc, "/".join(parts[:-1]), "", "" - ) + return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "") @cached_property def raw_name(self) -> str: @@ -998,13 +985,13 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": parsed.reverse() if not netloc or not needs_normalize: - return self._from_parts(self._scheme, netloc, "/".join(parsed), "", "") + return from_parts(self._scheme, netloc, "/".join(parsed), "", "") path = "/".join(normalize_path_segments(parsed)) # If normalizing the path segments removed the leading slash, add it back. if path and path[0] != "/": path = f"/{path}" - return self._from_parts(self._scheme, netloc, path, "", "") + return from_parts(self._scheme, netloc, path, "", "") def with_scheme(self, scheme: str) -> "URL": """Return a new URL with scheme replaced.""" @@ -1019,9 +1006,7 @@ def with_scheme(self, scheme: str) -> "URL": f"relative URLs for the {lower_scheme} scheme" ) raise ValueError(msg) - return self._from_parts( - lower_scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment) def with_user(self, user: Union[str, None]) -> "URL": """Return a new URL with user replaced. @@ -1043,9 +1028,7 @@ def with_user(self, user: Union[str, None]) -> "URL": raise ValueError("user replacement is not allowed for relative URLs") encoded_host = self.host_subcomponent or "" netloc = make_netloc(user, password, encoded_host, self.explicit_port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_password(self, password: Union[str, None]) -> "URL": """Return a new URL with password replaced. @@ -1067,9 +1050,7 @@ def with_password(self, password: Union[str, None]) -> "URL": encoded_host = self.host_subcomponent or "" port = self.explicit_port netloc = make_netloc(self.raw_user, password, encoded_host, port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_host(self, host: str) -> "URL": """Return a new URL with host replaced. @@ -1090,9 +1071,7 @@ def with_host(self, host: str) -> "URL": encoded_host = _encode_host(host, validate_host=True) if host else "" port = self.explicit_port netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_port(self, port: Union[int, None]) -> "URL": """Return a new URL with port replaced. @@ -1110,9 +1089,7 @@ def with_port(self, port: Union[int, None]) -> "URL": raise ValueError("port replacement is not allowed for relative URLs") encoded_host = self.host_subcomponent or "" netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) - return self._from_parts( - self._scheme, netloc, self._path, self._query, self._fragment - ) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) def with_path( self, @@ -1132,7 +1109,7 @@ def with_path( path = f"/{path}" query = self._query if keep_query else "" fragment = self._fragment if keep_fragment else "" - return self._from_parts(self._scheme, netloc, path, query, fragment) + return from_parts(self._scheme, netloc, path, query, fragment) @overload def with_query(self, query: Query) -> "URL": ... @@ -1155,9 +1132,7 @@ def with_query(self, *args: Any, **kwargs: Any) -> "URL": """ # N.B. doesn't cleanup query/fragment query = get_str_query(*args, **kwargs) or "" - return self._from_parts( - self._scheme, self._netloc, self._path, query, self._fragment - ) + return from_parts(self._scheme, self._netloc, self._path, query, self._fragment) @overload def extend_query(self, query: Query) -> "URL": ... @@ -1183,9 +1158,7 @@ def extend_query(self, *args: Any, **kwargs: Any) -> "URL": query += new_query if query[-1] == "&" else f"&{new_query}" else: query = new_query - return self._from_parts( - self._scheme, self._netloc, self._path, query, self._fragment - ) + return from_parts(self._scheme, self._netloc, self._path, query, self._fragment) @overload def update_query(self, query: Query) -> "URL": ... @@ -1242,9 +1215,7 @@ def update_query(self, *args: Any, **kwargs: Any) -> "URL": "Invalid query type: only str, mapping or " "sequence of (key, value) pairs is allowed" ) - return self._from_parts( - self._scheme, self._netloc, self._path, query, self._fragment - ) + return from_parts(self._scheme, self._netloc, self._path, query, self._fragment) def without_query_params(self, *query_params: str) -> "URL": """Remove some keys from query part and return new URL.""" @@ -1276,7 +1247,7 @@ def with_fragment(self, fragment: Union[str, None]) -> "URL": raw_fragment = FRAGMENT_QUOTER(fragment) if self._fragment == raw_fragment: return self - return self._from_parts( + return from_parts( self._scheme, self._netloc, self._path, self._query, raw_fragment ) @@ -1316,7 +1287,7 @@ def with_name( query = self._query if keep_query else "" fragment = self._fragment if keep_fragment else "" - return self._from_parts(self._scheme, netloc, "/".join(parts), query, fragment) + return from_parts(self._scheme, netloc, "/".join(parts), query, fragment) def with_suffix( self, @@ -1364,9 +1335,7 @@ def join(self, url: "URL") -> "URL": # scheme is in uses_authority as uses_authority is a superset of uses_relative if (join_netloc := url._netloc) and scheme in USES_AUTHORITY: - return self._from_parts( - scheme, join_netloc, url._path, url._query, url._fragment - ) + return from_parts(scheme, join_netloc, url._path, url._query, url._fragment) orig_path = self._path if join_path := url._path: @@ -1389,16 +1358,13 @@ def join(self, url: "URL") -> "URL": else: path = orig_path - new_url = object.__new__(URL) - new_url._scheme = scheme - new_url._netloc = self._netloc - new_url._path = path - new_url._query = url._query if join_path or url._query else self._query - new_url._fragment = ( - url._fragment if join_path or url._fragment else self._fragment + return from_parts( + scheme, + self._netloc, + path, + url._query if join_path or url._query else self._query, + url._fragment if join_path or url._fragment else self._fragment, ) - new_url._cache = {} - return new_url def joinpath(self, *other: str, encoded: bool = False) -> "URL": """Return a new URL with the elements in other appended to the path.""" From 18ba56e3f44de5579b0ab683c83f44efaa117611 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 29 Nov 2024 11:57:21 -0600 Subject: [PATCH 2/4] revert changes to build --- yarl/_url.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index db331c01..0bf68597 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -422,7 +422,14 @@ def build( if query: query_string = get_str_query(query) or "" - return from_parts(scheme, netloc, path, query_string, fragment) + url = object.__new__(cls) + url._scheme = scheme + url._netloc = netloc + url._path = path + url._query = query_string + url._fragment = fragment + url._cache = {} + return url def __init_subclass__(cls): raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") From 3d2ec3e38bf3a3adfea164523c64860cb128fdcc Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 29 Nov 2024 12:09:29 -0600 Subject: [PATCH 3/4] changelog --- CHANGES/1434.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/1434.misc.rst diff --git a/CHANGES/1434.misc.rst b/CHANGES/1434.misc.rst new file mode 100644 index 00000000..ec651202 --- /dev/null +++ b/CHANGES/1434.misc.rst @@ -0,0 +1 @@ +Improved cache performance for operations that modify the :class:`~yarl.URL` object -- by :user:`bdraco`. From ca7280864aa125434f871ddc922a393c308be31d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 29 Nov 2024 12:12:28 -0600 Subject: [PATCH 4/4] Update CHANGES/1434.misc.rst --- CHANGES/1434.misc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/1434.misc.rst b/CHANGES/1434.misc.rst index ec651202..89a6993c 100644 --- a/CHANGES/1434.misc.rst +++ b/CHANGES/1434.misc.rst @@ -1 +1 @@ -Improved cache performance for operations that modify the :class:`~yarl.URL` object -- by :user:`bdraco`. +Improved cache performance for operations that produce a new :class:`~yarl.URL` object -- by :user:`bdraco`.