diff --git a/CHANGES/704.feature.rst b/CHANGES/704.feature.rst new file mode 100644 index 000000000..686b24fb7 --- /dev/null +++ b/CHANGES/704.feature.rst @@ -0,0 +1 @@ +Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements. diff --git a/docs/api.rst b/docs/api.rst index b850e0c5d..b2ddf8421 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -817,6 +817,30 @@ The path is encoded if needed. >>> url URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0') +.. method:: URL.joinpath(*other, encoded=False) + + Construct a new URL by with all ``other`` elements appended to + *path*, and cleaned up *query* and *fragment* parts. + + Passing ``encoded=True`` parameter prevents path element auto-encoding, the caller is + responsible for taking care of URL correctness. + + .. doctest:: + + >>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath') + >>> url + URL('http://example.com/path/to/subpath') + >>> url.parts + ('/', 'path', 'to', 'subpath') + >>> url = URL('http://example.com/path?arg#frag').joinpath('сюда') + >>> url + URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0') + >>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True) + >>> url + URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0') + + .. versionadded:: 1.9 + .. method:: URL.join(url) Construct a full (“absolute”) URL by combining a “base URL” diff --git a/tests/test_url.py b/tests/test_url.py index 2d4f1cba7..65f1a83e5 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -767,6 +767,111 @@ def test_div_with_dots(): assert url.raw_path == "/path/to" +# joinpath + + +@pytest.mark.parametrize( + "base,to_join,expected", + [ + pytest.param("", ("path", "to"), "http://example.com/path/to", id="root"), + pytest.param( + "/", ("path", "to"), "http://example.com/path/to", id="root-with-slash" + ), + pytest.param("/path", ("to",), "http://example.com/path/to", id="path"), + pytest.param( + "/path/", ("to",), "http://example.com/path/to", id="path-with-slash" + ), + pytest.param( + "/path?a=1#frag", + ("to",), + "http://example.com/path/to", + id="cleanup-query-and-fragment", + ), + ], +) +def test_joinpath(base, to_join, expected): + url = URL(f"http://example.com{base}") + assert str(url.joinpath(*to_join)) == expected + + +@pytest.mark.parametrize( + "url,to_join,expected", + [ + pytest.param(URL(), ("a",), ("a",), id="empty-url"), + pytest.param(URL("a"), ("b",), ("a", "b"), id="relative-path"), + pytest.param(URL("a"), ("b", "", "c"), ("a", "b", "c"), id="empty-element"), + pytest.param(URL("/a"), ("b"), ("/", "a", "b"), id="absolute-path"), + ], +) +def test_joinpath_relative(url, to_join, expected): + assert url.joinpath(*to_join).raw_parts == expected + + +@pytest.mark.parametrize( + "url,to_join,encoded,e_path,e_raw_path,e_parts,e_raw_parts", + [ + pytest.param( + "http://example.com/сюда", + ("туда",), + False, + "/сюда/туда", + "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0", + ("/", "сюда", "туда"), + ("/", "%D1%81%D1%8E%D0%B4%D0%B0", "%D1%82%D1%83%D0%B4%D0%B0"), + id="non-ascii", + ), + pytest.param( + "http://example.com/path", + ("%cf%80",), + False, + "/path/%cf%80", + "/path/%25cf%2580", + ("/", "path", "%cf%80"), + ("/", "path", "%25cf%2580"), + id="percent-encoded", + ), + pytest.param( + "http://example.com/path", + ("%cf%80",), + True, + "/path/π", + "/path/%cf%80", + ("/", "path", "π"), + ("/", "path", "%cf%80"), + id="encoded-percent-encoded", + ), + ], +) +def test_joinpath_encoding( + url, to_join, encoded, e_path, e_raw_path, e_parts, e_raw_parts +): + joined = URL(url).joinpath(*to_join, encoded=encoded) + assert joined.path == e_path + assert joined.raw_path == e_raw_path + assert joined.parts == e_parts + assert joined.raw_parts == e_raw_parts + + +@pytest.mark.parametrize( + "to_join,expected", + [ + pytest.param(("path:abc@123",), "/base/path:abc@123", id="with-colon-and-at"), + pytest.param(("..", "path", ".", "to"), "/path/to", id="with-dots"), + ], +) +def test_joinpath_edgecases(to_join, expected): + url = URL("http://example.com/base").joinpath(*to_join) + assert url.raw_path == expected + + +def test_joinpath_path_starting_from_slash_is_forbidden(): + url = URL("http://example.com/path/") + with pytest.raises( + ValueError, match="Appending path .* starting from slash is forbidden" + ): + assert url.joinpath("/to/others") + + # with_path diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi index fc761b5c2..dac481d46 100644 --- a/yarl/__init__.pyi +++ b/yarl/__init__.pyi @@ -96,6 +96,7 @@ class URL: def with_name(self, name: str) -> URL: ... def with_suffix(self, suffix: str) -> URL: ... def join(self, url: URL) -> URL: ... + def joinpath(self, *url: str) -> URL: ... def human_repr(self) -> str: ... # private API @classmethod diff --git a/yarl/_url.py b/yarl/_url.py index e86b4b10c..39fcfa194 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -52,6 +52,30 @@ def __set__(self, inst, value): raise AttributeError("cached property is read-only") +def _normalize_path_segments(segments): + """Drop '.' and '..' from a sequence of str segments""" + + resolved_path = [] + + for seg in segments: + if seg == "..": + # ignore any .. segments that would otherwise cause an + # IndexError when popped from resolved_path if + # resolving for rfc3986 + with suppress(IndexError): + resolved_path.pop() + elif seg != ".": + resolved_path.append(seg) + + if segments and segments[-1] in (".", ".."): + # do some post-processing here. + # if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append("") + + return resolved_path + + @rewrite_module class URL: # Don't derive from str @@ -316,25 +340,7 @@ def __gt__(self, other): return self._val > other._val def __truediv__(self, name): - name = self._PATH_QUOTER(name) - if name.startswith("/"): - raise ValueError( - f"Appending path {name!r} starting from slash is forbidden" - ) - path = self._val.path - if path == "/": - new_path = "/" + name - elif not path and not self.is_absolute(): - new_path = name - else: - parts = path.rstrip("/").split("/") - parts.append(name) - new_path = "/".join(parts) - if self.is_absolute(): - new_path = self._normalize_path(new_path) - return URL( - self._val._replace(path=new_path, query="", fragment=""), encoded=True - ) + return self._make_child((name,)) def __mod__(self, query): return self.update_query(query) @@ -702,9 +708,37 @@ def _validate_authority_uri_abs_path(host, path): "Path in a URL with authority should start with a slash ('/') if set" ) + def _make_child(self, segments, encoded=False): + """add segments to self._val.path, accounting for absolute vs relative paths""" + parsed = [] + for seg in reversed(segments): + if not seg: + continue + if seg[0] == "/": + raise ValueError( + f"Appending path {seg!r} starting from slash is forbidden" + ) + seg = seg if encoded else self._PATH_QUOTER(seg) + if "/" in seg: + parsed += ( + sub for sub in reversed(seg.split("/")) if sub and sub != "." + ) + elif seg != ".": + parsed.append(seg) + parsed.reverse() + old_path = self._val.path + if old_path: + parsed = [*old_path.rstrip("/").split("/"), *parsed] + if self.is_absolute(): + parsed = _normalize_path_segments(parsed) + new_path = "/".join(parsed) + return URL( + self._val._replace(path=new_path, query="", fragment=""), encoded=True + ) + @classmethod def _normalize_path(cls, path): - # Drop '.' and '..' from path + # Drop '.' and '..' from str path prefix = "" if path.startswith("/"): @@ -714,25 +748,7 @@ def _normalize_path(cls, path): path = path[1:] segments = path.split("/") - resolved_path = [] - - for seg in segments: - if seg == "..": - # ignore any .. segments that would otherwise cause an - # IndexError when popped from resolved_path if - # resolving for rfc3986 - with suppress(IndexError): - resolved_path.pop() - elif seg != ".": - resolved_path.append(seg) - - if segments and segments[-1] in (".", ".."): - # do some post-processing here. - # if the last segment was a relative dir, - # then we need to append the trailing '/' - resolved_path.append("") - - return prefix + "/".join(resolved_path) + return prefix + "/".join(_normalize_path_segments(segments)) @classmethod def _encode_host(cls, host, human=False): @@ -1086,6 +1102,10 @@ def join(self, url): raise TypeError("url should be URL") return URL(urljoin(str(self), str(url)), encoded=True) + def joinpath(self, *other, encoded=False): + """Return a new URL with the elements in other appended to the path.""" + return self._make_child(other, encoded=encoded) + def human_repr(self): """Return decoded human readable string for URL representation.""" user = _human_quote(self.user, "#/:?@")