From 2edbe412cf47f9d3dadd61bbc08a5cd427747b25 Mon Sep 17 00:00:00 2001 From: Martijn Pieters Date: Tue, 29 Mar 2022 15:01:02 +0100 Subject: [PATCH] Add URL.joinpath(*elements, encoding=False) to build a URL with multiple new path elements This is analogous to `Path(...).joinpath(...)`, except that it will not accept elements that start with `/`. --- CHANGES/704.feature.rst | 1 + docs/api.rst | 24 +++++++++++ tests/test_url.py | 95 +++++++++++++++++++++++++++++++++++++++++ yarl/__init__.pyi | 1 + yarl/_url.py | 62 ++++++++++++++++++--------- 5 files changed, 162 insertions(+), 21 deletions(-) create mode 100644 CHANGES/704.feature.rst diff --git a/CHANGES/704.feature.rst b/CHANGES/704.feature.rst new file mode 100644 index 000000000..686b24fb7 --- /dev/null +++ b/CHANGES/704.feature.rst @@ -0,0 +1 @@ +Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements. diff --git a/docs/api.rst b/docs/api.rst index b850e0c5d..88c5527db 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -817,6 +817,30 @@ The path is encoded if needed. >>> url URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0') +.. method:: URL.joinpath(*other, encoded=False) + + Construct a new URL by with all ``other`` elements appended to + *path*, and cleaned up *query* and *fragment* parts. + + Passing ``encoded=True`` parameter prevents path element auto-encoding, user is + responsible for taking care of URL correctness. + + .. doctest:: + + >>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath') + >>> url + URL('http://example.com/path/to/subpath') + >>> url.parts + ('/', 'path', 'to', 'subpath') + >>> url = URL('http://example.com/path?arg#frag').joinpath('сюда') + >>> url + URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0') + >>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True) + >>> url + URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0') + + .. versionadded:: 1.9 + .. method:: URL.join(url) Construct a full (“absolute”) URL by combining a “base URL” diff --git a/tests/test_url.py b/tests/test_url.py index 2d4f1cba7..b8d2591b0 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -767,6 +767,101 @@ def test_div_with_dots(): assert url.raw_path == "/path/to" +# joinpath + + +def test_joinpath_root(): + url = URL("http://example.com") + assert str(url.joinpath("path", "to")) == "http://example.com/path/to" + + +def test_joinpath_root_with_slash(): + url = URL("http://example.com/") + assert str(url.joinpath("path", "to")) == "http://example.com/path/to" + + +def test_joinpath(): + url = URL("http://example.com/path") + assert str(url.joinpath("to")) == "http://example.com/path/to" + + +def test_joinpath_with_slash(): + url = URL("http://example.com/path/") + assert str(url.joinpath("to")) == "http://example.com/path/to" + + +def test_joinpath_path_starting_from_slash_is_forbidden(): + url = URL("http://example.com/path/") + with pytest.raises(ValueError): + assert url.joinpath("/to/others") + + +def test_joinpath_cleanup_query_and_fragment(): + url = URL("http://example.com/path?a=1#frag") + assert str(url.joinpath("to")) == "http://example.com/path/to" + + +def test_joinpath_for_empty_url(): + url = URL().joinpath("a") + assert url.raw_parts == ("a",) + + +def test_joinpath_for_relative_url(): + url = URL("a").joinpath("b") + assert url.raw_parts == ("a", "b") + + +def test_joinpath_with_empty_element(): + url = URL("a").joinpath("b", "", "c") + assert url.raw_parts == ("a", "b", "c") + + +def test_joinpath_for_relative_url_started_with_slash(): + url = URL("/a").joinpath("b") + assert url.raw_parts == ("/", "a", "b") + + +def test_joinpath_non_ascii(): + url = URL("http://example.com/сюда") + url2 = url.joinpath("туда") + assert url2.path == "/сюда/туда" + assert url2.raw_path == "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0" + assert url2.parts == ("/", "сюда", "туда") + assert url2.raw_parts == ( + "/", + "%D1%81%D1%8E%D0%B4%D0%B0", + "%D1%82%D1%83%D0%B4%D0%B0", + ) + + +def test_joinpath_percent_encoded(): + url = URL("http://example.com/path") + url2 = url.joinpath("%cf%80") + assert url2.path == "/path/%cf%80" + assert url2.raw_path == "/path/%25cf%2580" + assert url2.parts == ("/", "path", "%cf%80") + assert url2.raw_parts == ("/", "path", "%25cf%2580") + + +def test_joinpath_encoded_percent_encoded(): + url = URL("http://example.com/path") + url2 = url.joinpath("%cf%80", encoded=True) + assert url2.path == "/path/π" + assert url2.raw_path == "/path/%cf%80" + assert url2.parts == ("/", "path", "π") + assert url2.raw_parts == ("/", "path", "%cf%80") + + +def test_joinpath_with_colon_and_at(): + url = URL("http://example.com/base").joinpath("path:abc@123") + assert url.raw_path == "/base/path:abc@123" + + +def test_joinpath_with_dots(): + url = URL("http://example.com/base").joinpath("..", "path", ".", "to") + assert url.raw_path == "/path/to" + + # with_path diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi index fc761b5c2..dac481d46 100644 --- a/yarl/__init__.pyi +++ b/yarl/__init__.pyi @@ -96,6 +96,7 @@ class URL: def with_name(self, name: str) -> URL: ... def with_suffix(self, suffix: str) -> URL: ... def join(self, url: URL) -> URL: ... + def joinpath(self, *url: str) -> URL: ... def human_repr(self) -> str: ... # private API @classmethod diff --git a/yarl/_url.py b/yarl/_url.py index dfcff2ef3..78267c478 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -315,25 +315,7 @@ def __gt__(self, other): return self._val > other._val def __truediv__(self, name): - name = self._PATH_QUOTER(name) - if name.startswith("/"): - raise ValueError( - f"Appending path {name!r} starting from slash is forbidden" - ) - path = self._val.path - if path == "/": - new_path = "/" + name - elif not path and not self.is_absolute(): - new_path = name - else: - parts = path.rstrip("/").split("/") - parts.append(name) - new_path = "/".join(parts) - if self.is_absolute(): - new_path = self._normalize_path(new_path) - return URL( - self._val._replace(path=new_path, query="", fragment=""), encoded=True - ) + return self._make_child((name,)) def __mod__(self, query): return self.update_query(query) @@ -701,11 +683,45 @@ def _validate_authority_uri_abs_path(host, path): "Path in a URL with authority should start with a slash ('/') if set" ) + def _make_child(self, segments, encoded=False): + # add segments to self._val.path, accounting for absolute vs relative paths + parsed = [] + for seg in reversed(segments): + if not seg: + continue + if seg[0] == "/": + raise ValueError( + f"Appending path {seg!r} starting from slash is forbidden" + ) + seg = seg if encoded else self._PATH_QUOTER(seg) + if "/" in seg: + parsed += ( + sub for sub in reversed(seg.split("/")) if sub and sub != "." + ) + elif seg != ".": + parsed.append(seg) + parsed.reverse() + old_path = self._val.path + if old_path: + parsed = [*old_path.rstrip("/").split("/"), *parsed] + if self.is_absolute(): + parsed = self._normalize_path_segments(parsed) + new_path = "/".join(parsed) + return URL( + self._val._replace(path=new_path, query="", fragment=""), encoded=True + ) + @classmethod def _normalize_path(cls, path): - # Drop '.' and '..' from path + # Drop '.' and '..' from str path segments = path.split("/") + return "/".join(cls._normalize_path_segments(segments)) + + @classmethod + def _normalize_path_segments(cls, segments): + # Drop '.' and '..' from a sequence of str segments + resolved_path = [] for seg in segments: @@ -728,7 +744,7 @@ def _normalize_path(cls, path): # then we need to append the trailing '/' resolved_path.append("") - return "/".join(resolved_path) + return resolved_path @classmethod def _encode_host(cls, host, human=False): @@ -1077,6 +1093,10 @@ def join(self, url): raise TypeError("url should be URL") return URL(urljoin(str(self), str(url)), encoded=True) + def joinpath(self, *other, encoded=False): + """Return a new URL with the elements in other appended to the path.""" + return self._make_child(other, encoded=encoded) + def human_repr(self): """Return decoded human readable string for URL representation.""" user = _human_quote(self.user, "#/:?@")