From ccc4d34c360160a9bd165879a422edae7ab78e30 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 25 Oct 2024 02:20:40 -1000 Subject: [PATCH 1/3] Revert URL subtraction It appears we merged the URL subtraction a bit too soon, and we need to rework the approach. I know have ~20 hours into it trying to solve the issues. The conclusion is that it is not ready to ship and we need a new approach. To ensure it does not go out before its ready, revert the original PR and the attempts to solve the issues with it so we can start fresh. References --- CHANGES/1340.feature.rst | 22 ---------- docs/api.rst | 15 ------- tests/test_url.py | 83 ------------------------------------ tests/test_url_benchmarks.py | 14 ------ yarl/_path.py | 30 ------------- yarl/_url.py | 30 +------------ 6 files changed, 1 insertion(+), 193 deletions(-) delete mode 100644 CHANGES/1340.feature.rst diff --git a/CHANGES/1340.feature.rst b/CHANGES/1340.feature.rst deleted file mode 100644 index c879569a0..000000000 --- a/CHANGES/1340.feature.rst +++ /dev/null @@ -1,22 +0,0 @@ -Added support for using the :meth:`subtraction operator ` -to get the relative path between URLs. - -Note that both URLs must have the same scheme, user, password, host and port: - -.. code-block:: pycon - - >>> target = URL("http://example.com/path/index.html") - >>> base = URL("http://example.com/") - >>> target - base - URL('path/index.html') - -URLs can also be relative: - -.. code-block:: pycon - - >>> target = URL("/") - >>> base = URL("/path/index.html") - >>> target - base - URL('..') - --- by :user:`oleksbabieiev`. diff --git a/docs/api.rst b/docs/api.rst index 071cff7e5..f9698fb37 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -997,21 +997,6 @@ The path is encoded if needed. >>> base.join(URL('//python.org/page.html')) URL('http://python.org/page.html') -The subtraction (``-``) operator creates a new URL with -a relative *path* to the target URL from the given base URL. -*scheme*, *user*, *password*, *host*, *port*, *query* and *fragment* are removed. - -.. method:: URL.__sub__(url) - - Returns a new URL with a relative *path* between two other URL objects. - - .. doctest:: - - >>> target = URL('http://example.com/path/index.html') - >>> base = URL('http://example.com/') - >>> target - base - URL('path/index.html') - Human readable representation ----------------------------- diff --git a/tests/test_url.py b/tests/test_url.py index 444a9f565..73a3a4a2e 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -61,89 +61,6 @@ def test_str(): assert str(url) == "http://example.com:8888/path/to?a=1&b=2" -@pytest.mark.parametrize( - ("target", "base", "expected"), - [ - ("http://example.com/path/to", "http://example.com/", "path/to"), - ("http://example.com/path/to", "http://example.com/spam", "path/to"), - ("http://example.com/this/is/a/test", "http://example.com/this/", "is/a/test"), - ( - "http://example.com/this/./is/a/test", - "http://example.com/this/", - "is/a/test", - ), - ("http://example.com/this/is/../a//test", "http://example.com/this/", "a/test"), - ("http://example.com/path/to", "http://example.com/spam/", "../path/to"), - ("http://example.com/path", "http://example.com/path/to/", ".."), - ("http://example.com/path", "http://example.com/other/../path/to/", ".."), - ("http://example.com/", "http://example.com/", "."), - ("http://example.com", "http://example.com", "."), - ("http://example.com/", "http://example.com", "."), - ("http://example.com", "http://example.com/", "."), - ("//example.com", "//example.com", "."), - ("/path/to", "/spam/", "../path/to"), - ("path/to", "spam/", "../path/to"), - ("path/../to", "path/", "../to"), - ("path/..", ".", "path/.."), - ("path/../replace/me", "path/../replace", "replace/me"), - ("path/../replace/me", "path/../replace/", "me"), - ("path/to", "spam", "path/to"), - ("..", ".", ".."), - (".", "..", "."), - ], -) -def test_sub(target: str, base: str, expected: str): - expected_url = URL(expected) - result_url = URL(target) - URL(base) - assert result_url == expected_url - - -@pytest.mark.xfail(reason="Empty segments are not preserved") -@pytest.mark.parametrize( - ("target", "base", "expected"), - [ - ( - "http://example.com/////path/////to", - "http://example.com/////spam", - "path/////to", - ), - ( - "http://example.com////path/////to", - "http://example.com/////spam", - "..//path/////to", - ), - ], -) -def test_sub_empty_segments(target: str, base: str, expected: str): - expected_url = URL(expected) - result_url = URL(target) - URL(base) - assert result_url == expected_url - - -def test_sub_with_different_schemes(): - expected_error_msg = "Both URLs should have the same scheme" - with pytest.raises(ValueError, match=expected_error_msg): - URL("http://example.com/") - URL("https://example.com/") - - -def test_sub_with_different_netlocs(): - expected_error_msg = "Both URLs should have the same netloc" - with pytest.raises(ValueError, match=expected_error_msg): - URL("https://spam.com/") - URL("https://ham.com/") - - -def test_sub_with_different_anchors(): - expected_error_msg = "'path/to' and '/path' have different anchors" - with pytest.raises(ValueError, match=expected_error_msg): - URL("path/to") - URL("/path/from") - - -def test_sub_with_two_dots_in_base(): - expected_error_msg = "'..' segment in '/path/..' cannot be walked" - with pytest.raises(ValueError, match=expected_error_msg): - URL("path/to") - URL("/path/../from") - - def test_repr(): url = URL("http://example.com") assert "URL('http://example.com')" == repr(url) diff --git a/tests/test_url_benchmarks.py b/tests/test_url_benchmarks.py index d5737ef4b..762db715b 100644 --- a/tests/test_url_benchmarks.py +++ b/tests/test_url_benchmarks.py @@ -589,20 +589,6 @@ def _run() -> None: url.query -def test_url_subtract(benchmark: BenchmarkFixture) -> None: - @benchmark - def _run() -> None: - for _ in range(100): - URL_WITH_LONGER_PATH - URL_WITH_PATH - - -def test_url_subtract_long_urls(benchmark: BenchmarkFixture) -> None: - @benchmark - def _run() -> None: - for _ in range(100): - URL_VERY_LONG_PATH - URL_LONG_PATH - - def test_url_host_port_subcomponent(benchmark: BenchmarkFixture) -> None: cache_non_default = URL_WITH_NOT_DEFAULT_PORT._cache cache = BASE_URL._cache diff --git a/yarl/_path.py b/yarl/_path.py index 838aa94cb..c22f0b4b8 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -2,8 +2,6 @@ from collections.abc import Sequence from contextlib import suppress -from itertools import chain -from pathlib import PurePosixPath def normalize_path_segments(segments: Sequence[str]) -> list[str]: @@ -41,31 +39,3 @@ def normalize_path(path: str) -> str: segments = path.split("/") return prefix + "/".join(normalize_path_segments(segments)) - - -def calculate_relative_path(target: str, base: str) -> str: - """Return the relative path between two other paths. - - If the operation is not possible, raise ValueError. - """ - - target = target or "/" - base = base or "/" - - target_path = PurePosixPath(target) - base_path = PurePosixPath(base) - - if base[-1] != "/": - base_path = base_path.parent - - for step, path in enumerate(chain((base_path,), base_path.parents)): - if path == target_path or path in target_path.parents: - break - elif path.name == "..": - raise ValueError(f"'..' segment in {str(base_path)!r} cannot be walked") - else: - raise ValueError( - f"{str(target_path)!r} and {str(base_path)!r} have different anchors" - ) - offset = len(path.parts) - return str(PurePosixPath(*("..",) * step, *target_path.parts[offset:])) diff --git a/yarl/_url.py b/yarl/_url.py index 1766f305c..95baac81a 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -12,7 +12,7 @@ from propcache.api import under_cached_property as cached_property from ._parse import USES_AUTHORITY, make_netloc, split_netloc, split_url, unsplit_result -from ._path import calculate_relative_path, normalize_path, normalize_path_segments +from ._path import normalize_path, normalize_path_segments from ._query import ( Query, QueryVariable, @@ -477,34 +477,6 @@ def __truediv__(self, name: str) -> "URL": return NotImplemented return self._make_child((str(name),)) - def __sub__(self, other: object) -> "URL": - """Return a new URL with a relative path between two other URL objects. - - Note that both URLs must have the same scheme and netloc. - The new relative URL has only path: - scheme, user, password, host, port, query and fragment are removed. - - Example: - >>> target = URL("http://example.com/path/index.html") - >>> base = URL("http://example.com/") - >>> target - base - URL('path/index.html') - """ - - if type(other) is not URL: - return NotImplemented - - target_scheme, target_netloc, target_path, _, _ = self._val - base_scheme, base_netloc, base_path, _, _ = other._val - - if target_scheme != base_scheme: - raise ValueError("Both URLs should have the same scheme") - if target_netloc != base_netloc: - raise ValueError("Both URLs should have the same netloc") - - path = calculate_relative_path(target_path, base_path) - return self._from_tup(("", "", path, "", "")) - def __mod__(self, query: Query) -> "URL": return self.update_query(query) From 4753a7d268ea188ae672228ec6ae8f95ef3e84e5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 25 Oct 2024 02:26:26 -1000 Subject: [PATCH 2/3] cleanups --- CHANGES/1379.feature.rst | 1 - CHANGES/1382.feature.rst | 1 - 2 files changed, 2 deletions(-) delete mode 120000 CHANGES/1379.feature.rst delete mode 120000 CHANGES/1382.feature.rst diff --git a/CHANGES/1379.feature.rst b/CHANGES/1379.feature.rst deleted file mode 120000 index 62a3dbc9b..000000000 --- a/CHANGES/1379.feature.rst +++ /dev/null @@ -1 +0,0 @@ -1340.feature.rst \ No newline at end of file diff --git a/CHANGES/1382.feature.rst b/CHANGES/1382.feature.rst deleted file mode 120000 index 62a3dbc9b..000000000 --- a/CHANGES/1382.feature.rst +++ /dev/null @@ -1 +0,0 @@ -1340.feature.rst \ No newline at end of file From 402a8b0dfe727f0baef8a5cb4b7948205f9a182d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 25 Oct 2024 02:48:35 -1000 Subject: [PATCH 3/3] some missed constants need to be reverted as well --- tests/test_url_benchmarks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_url_benchmarks.py b/tests/test_url_benchmarks.py index 762db715b..7e9a9df79 100644 --- a/tests/test_url_benchmarks.py +++ b/tests/test_url_benchmarks.py @@ -18,14 +18,11 @@ QUERY_URL = URL(QUERY_URL_STR) URL_WITH_PATH_STR = "http://www.domain.tld/req" URL_WITH_PATH = URL(URL_WITH_PATH_STR) -URL_WITH_LONGER_PATH = URL("http://www.domain.tld/req/req/req") REL_URL = URL("/req") QUERY_SEQ = {str(i): tuple(str(j) for j in range(10)) for i in range(10)} SIMPLE_QUERY = {str(i): str(i) for i in range(10)} SIMPLE_INT_QUERY = {str(i): i for i in range(10)} QUERY_STRING = "x=y&z=1" -URL_VERY_LONG_PATH = URL("http://www.domain.tld/" + "req/" * 100) -URL_LONG_PATH = URL("http://www.domain.tld/" + "req/" * 30) class _SubClassedStr(str):