Add URL.joinpath(*elements, encoding=False) to build a URL with multi…

…ple new path elements This is analogous to `Path(...).joinpath(...)`, except that it will not accept elements that start with `/`.
aio-libs · Dec 13, 2022 · 3e942f7 · 3e942f7
1 parent 15d4617
commit 3e942f7
Show file tree

Hide file tree

Showing 5 changed files with 190 additions and 39 deletions.
diff --git a/CHANGES/704.feature.rst b/CHANGES/704.feature.rst
@@ -0,0 +1 @@
+Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.
diff --git a/docs/api.rst b/docs/api.rst
@@ -817,6 +817,30 @@ The path is encoded if needed.
       >>> url
       URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
 
+.. method:: URL.joinpath(*other, encoded=False)
+
+   Construct a new URL by with all ``other`` elements appended to
+   *path*, and cleaned up *query* and *fragment* parts.
+
+   Passing ``encoded=True`` parameter prevents path element auto-encoding, the caller is
+   responsible for taking care of URL correctness.
+
+   .. doctest::
+
+      >>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath')
+      >>> url
+      URL('http://example.com/path/to/subpath')
+      >>> url.parts
+      ('/', 'path', 'to', 'subpath')
+      >>> url = URL('http://example.com/path?arg#frag').joinpath('сюда')
+      >>> url
+      URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
+      >>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True)
+      >>> url
+      URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
+
+   .. versionadded:: 1.9
+
 .. method:: URL.join(url)
 
    Construct a full (“absolute”) URL by combining a “base URL”

diff --git a/tests/test_url.py b/tests/test_url.py
@@ -767,6 +767,111 @@ def test_div_with_dots():
     assert url.raw_path == "/path/to"
 
 
+# joinpath
+
+
+@pytest.mark.parametrize(
+    "base,to_join,expected",
+    [
+        pytest.param("", ("path", "to"), "http://example.com/path/to", id="root"),
+        pytest.param(
+            "/", ("path", "to"), "http://example.com/path/to", id="root-with-slash"
+        ),
+        pytest.param("/path", ("to",), "http://example.com/path/to", id="path"),
+        pytest.param(
+            "/path/", ("to",), "http://example.com/path/to", id="path-with-slash"
+        ),
+        pytest.param(
+            "/path?a=1#frag",
+            ("to",),
+            "http://example.com/path/to",
+            id="cleanup-query-and-fragment",
+        ),
+    ],
+)
+def test_joinpath(base, to_join, expected):
+    url = URL(f"http://example.com{base}")
+    assert str(url.joinpath(*to_join)) == expected
+
+
+@pytest.mark.parametrize(
+    "url,to_join,expected",
+    [
+        pytest.param(URL(), ("a",), ("a",), id="empty-url"),
+        pytest.param(URL("a"), ("b",), ("a", "b"), id="relative-path"),
+        pytest.param(URL("a"), ("b", "", "c"), ("a", "b", "c"), id="empty-element"),
+        pytest.param(URL("/a"), ("b"), ("/", "a", "b"), id="absolute-path"),
+    ],
+)
+def test_joinpath_relative(url, to_join, expected):
+    assert url.joinpath(*to_join).raw_parts == expected
+
+
+@pytest.mark.parametrize(
+    "url,to_join,encoded,e_path,e_raw_path,e_parts,e_raw_parts",
+    [
+        pytest.param(
+            "http://example.com/сюда",
+            ("туда",),
+            False,
+            "/сюда/туда",
+            "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0",
+            ("/", "сюда", "туда"),
+            ("/", "%D1%81%D1%8E%D0%B4%D0%B0", "%D1%82%D1%83%D0%B4%D0%B0"),
+            id="non-ascii",
+        ),
+        pytest.param(
+            "http://example.com/path",
+            ("%cf%80",),
+            False,
+            "/path/%cf%80",
+            "/path/%25cf%2580",
+            ("/", "path", "%cf%80"),
+            ("/", "path", "%25cf%2580"),
+            id="percent-encoded",
+        ),
+        pytest.param(
+            "http://example.com/path",
+            ("%cf%80",),
+            True,
+            "/path/π",
+            "/path/%cf%80",
+            ("/", "path", "π"),
+            ("/", "path", "%cf%80"),
+            id="encoded-percent-encoded",
+        ),
+    ],
+)
+def test_joinpath_encoding(
+    url, to_join, encoded, e_path, e_raw_path, e_parts, e_raw_parts
+):
+    joined = URL(url).joinpath(*to_join, encoded=encoded)
+    assert joined.path == e_path
+    assert joined.raw_path == e_raw_path
+    assert joined.parts == e_parts
+    assert joined.raw_parts == e_raw_parts
+
+
+@pytest.mark.parametrize(
+    "to_join,expected",
+    [
+        pytest.param(("path:abc@123",), "/base/path:abc@123", id="with-colon-and-at"),
+        pytest.param(("..", "path", ".", "to"), "/path/to", id="with-dots"),
+    ],
+)
+def test_joinpath_edgecases(to_join, expected):
+    url = URL("http://example.com/base").joinpath(*to_join)
+    assert url.raw_path == expected
+
+
+def test_joinpath_path_starting_from_slash_is_forbidden():
+    url = URL("http://example.com/path/")
+    with pytest.raises(
+        ValueError, match="Appending path .* starting from slash is forbidden"
+    ):
+        assert url.joinpath("/to/others")
+
+
 # with_path
 
 

diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi
@@ -96,6 +96,7 @@ class URL:
     def with_name(self, name: str) -> URL: ...
     def with_suffix(self, suffix: str) -> URL: ...
     def join(self, url: URL) -> URL: ...
+    def joinpath(self, *url: str) -> URL: ...
     def human_repr(self) -> str: ...
     # private API
     @classmethod

diff --git a/yarl/_url.py b/yarl/_url.py
@@ -52,6 +52,30 @@ def __set__(self, inst, value):
         raise AttributeError("cached property is read-only")
 
 
+def _normalize_path_segments(segments):
+    """Drop '.' and '..' from a sequence of str segments"""
+
+    resolved_path = []
+
+    for seg in segments:
+        if seg == "..":
+            # ignore any .. segments that would otherwise cause an
+            # IndexError when popped from resolved_path if
+            # resolving for rfc3986
+            with suppress(IndexError):
+                resolved_path.pop()
+        elif seg != ".":
+            resolved_path.append(seg)
+
+    if segments and segments[-1] in (".", ".."):
+        # do some post-processing here.
+        # if the last segment was a relative dir,
+        # then we need to append the trailing '/'
+        resolved_path.append("")
+
+    return resolved_path
+
+
 @rewrite_module
 class URL:
     # Don't derive from str
@@ -316,25 +340,7 @@ def __gt__(self, other):
         return self._val > other._val
 
     def __truediv__(self, name):
-        name = self._PATH_QUOTER(name)
-        if name.startswith("/"):
-            raise ValueError(
-                f"Appending path {name!r} starting from slash is forbidden"
-            )
-        path = self._val.path
-        if path == "/":
-            new_path = "/" + name
-        elif not path and not self.is_absolute():
-            new_path = name
-        else:
-            parts = path.rstrip("/").split("/")
-            parts.append(name)
-            new_path = "/".join(parts)
-        if self.is_absolute():
-            new_path = self._normalize_path(new_path)
-        return URL(
-            self._val._replace(path=new_path, query="", fragment=""), encoded=True
-        )
+        return self._make_child((name,))
 
     def __mod__(self, query):
         return self.update_query(query)
@@ -702,9 +708,37 @@ def _validate_authority_uri_abs_path(host, path):
                 "Path in a URL with authority should start with a slash ('/') if set"
             )
 
+    def _make_child(self, segments, encoded=False):
+        """add segments to self._val.path, accounting for absolute vs relative paths"""
+        parsed = []
+        for seg in reversed(segments):
+            if not seg:
+                continue
+            if seg[0] == "/":
+                raise ValueError(
+                    f"Appending path {seg!r} starting from slash is forbidden"
+                )
+            seg = seg if encoded else self._PATH_QUOTER(seg)
+            if "/" in seg:
+                parsed += (
+                    sub for sub in reversed(seg.split("/")) if sub and sub != "."
+                )
+            elif seg != ".":
+                parsed.append(seg)
+        parsed.reverse()
+        old_path = self._val.path
+        if old_path:
+            parsed = [*old_path.rstrip("/").split("/"), *parsed]
+        if self.is_absolute():
+            parsed = _normalize_path_segments(parsed)
+        new_path = "/".join(parsed)
+        return URL(
+            self._val._replace(path=new_path, query="", fragment=""), encoded=True
+        )
+
     @classmethod
     def _normalize_path(cls, path):
-        # Drop '.' and '..' from path
+        # Drop '.' and '..' from str path
 
         prefix = ""
         if path.startswith("/"):
@@ -714,25 +748,7 @@ def _normalize_path(cls, path):
             path = path[1:]
 
         segments = path.split("/")
-        resolved_path = []
-
-        for seg in segments:
-            if seg == "..":
-                # ignore any .. segments that would otherwise cause an
-                # IndexError when popped from resolved_path if
-                # resolving for rfc3986
-                with suppress(IndexError):
-                    resolved_path.pop()
-            elif seg != ".":
-                resolved_path.append(seg)
-
-        if segments and segments[-1] in (".", ".."):
-            # do some post-processing here.
-            # if the last segment was a relative dir,
-            # then we need to append the trailing '/'
-            resolved_path.append("")
-
-        return prefix + "/".join(resolved_path)
+        return prefix + "/".join(_normalize_path_segments(segments))
 
     @classmethod
     def _encode_host(cls, host, human=False):
@@ -1086,6 +1102,10 @@ def join(self, url):
             raise TypeError("url should be URL")
         return URL(urljoin(str(self), str(url)), encoded=True)
 
+    def joinpath(self, *other, encoded=False):
+        """Return a new URL with the elements in other appended to the path."""
+        return self._make_child(other, encoded=encoded)
+
     def human_repr(self):
         """Return decoded human readable string for URL representation."""
         user = _human_quote(self.user, "#/:?@")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.