Add URL.joinpath(*elements, encoding=False) to build a URL with multi…

…ple new path elements This is analogous to `Path(...).joinpath(...)`, except that it will not accept elements that start with `/`.
aio-libs · Aug 1, 2022 · 2edbe41 · 2edbe41
1 parent dcced6d
commit 2edbe41
Show file tree

Hide file tree

Showing 5 changed files with 162 additions and 21 deletions.
diff --git a/CHANGES/704.feature.rst b/CHANGES/704.feature.rst
@@ -0,0 +1 @@
+Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.
diff --git a/docs/api.rst b/docs/api.rst
@@ -817,6 +817,30 @@ The path is encoded if needed.
       >>> url
       URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
 
+.. method:: URL.joinpath(*other, encoded=False)
+
+   Construct a new URL by with all ``other`` elements appended to
+   *path*, and cleaned up *query* and *fragment* parts.
+
+   Passing ``encoded=True`` parameter prevents path element auto-encoding, user is
+   responsible for taking care of URL correctness.
+
+   .. doctest::
+
+      >>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath')
+      >>> url
+      URL('http://example.com/path/to/subpath')
+      >>> url.parts
+      ('/', 'path', 'to', 'subpath')
+      >>> url = URL('http://example.com/path?arg#frag').joinpath('сюда')
+      >>> url
+      URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
+      >>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True)
+      >>> url
+      URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
+
+   .. versionadded:: 1.9
+
 .. method:: URL.join(url)
 
    Construct a full (“absolute”) URL by combining a “base URL”

diff --git a/tests/test_url.py b/tests/test_url.py
@@ -767,6 +767,101 @@ def test_div_with_dots():
     assert url.raw_path == "/path/to"
 
 
+# joinpath
+
+
+def test_joinpath_root():
+    url = URL("http://example.com")
+    assert str(url.joinpath("path", "to")) == "http://example.com/path/to"
+
+
+def test_joinpath_root_with_slash():
+    url = URL("http://example.com/")
+    assert str(url.joinpath("path", "to")) == "http://example.com/path/to"
+
+
+def test_joinpath():
+    url = URL("http://example.com/path")
+    assert str(url.joinpath("to")) == "http://example.com/path/to"
+
+
+def test_joinpath_with_slash():
+    url = URL("http://example.com/path/")
+    assert str(url.joinpath("to")) == "http://example.com/path/to"
+
+
+def test_joinpath_path_starting_from_slash_is_forbidden():
+    url = URL("http://example.com/path/")
+    with pytest.raises(ValueError):
+        assert url.joinpath("/to/others")
+
+
+def test_joinpath_cleanup_query_and_fragment():
+    url = URL("http://example.com/path?a=1#frag")
+    assert str(url.joinpath("to")) == "http://example.com/path/to"
+
+
+def test_joinpath_for_empty_url():
+    url = URL().joinpath("a")
+    assert url.raw_parts == ("a",)
+
+
+def test_joinpath_for_relative_url():
+    url = URL("a").joinpath("b")
+    assert url.raw_parts == ("a", "b")
+
+
+def test_joinpath_with_empty_element():
+    url = URL("a").joinpath("b", "", "c")
+    assert url.raw_parts == ("a", "b", "c")
+
+
+def test_joinpath_for_relative_url_started_with_slash():
+    url = URL("/a").joinpath("b")
+    assert url.raw_parts == ("/", "a", "b")
+
+
+def test_joinpath_non_ascii():
+    url = URL("http://example.com/сюда")
+    url2 = url.joinpath("туда")
+    assert url2.path == "/сюда/туда"
+    assert url2.raw_path == "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0"
+    assert url2.parts == ("/", "сюда", "туда")
+    assert url2.raw_parts == (
+        "/",
+        "%D1%81%D1%8E%D0%B4%D0%B0",
+        "%D1%82%D1%83%D0%B4%D0%B0",
+    )
+
+
+def test_joinpath_percent_encoded():
+    url = URL("http://example.com/path")
+    url2 = url.joinpath("%cf%80")
+    assert url2.path == "/path/%cf%80"
+    assert url2.raw_path == "/path/%25cf%2580"
+    assert url2.parts == ("/", "path", "%cf%80")
+    assert url2.raw_parts == ("/", "path", "%25cf%2580")
+
+
+def test_joinpath_encoded_percent_encoded():
+    url = URL("http://example.com/path")
+    url2 = url.joinpath("%cf%80", encoded=True)
+    assert url2.path == "/path/π"
+    assert url2.raw_path == "/path/%cf%80"
+    assert url2.parts == ("/", "path", "π")
+    assert url2.raw_parts == ("/", "path", "%cf%80")
+
+
+def test_joinpath_with_colon_and_at():
+    url = URL("http://example.com/base").joinpath("path:abc@123")
+    assert url.raw_path == "/base/path:abc@123"
+
+
+def test_joinpath_with_dots():
+    url = URL("http://example.com/base").joinpath("..", "path", ".", "to")
+    assert url.raw_path == "/path/to"
+
+
 # with_path
 
 

diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi
@@ -96,6 +96,7 @@ class URL:
     def with_name(self, name: str) -> URL: ...
     def with_suffix(self, suffix: str) -> URL: ...
     def join(self, url: URL) -> URL: ...
+    def joinpath(self, *url: str) -> URL: ...
     def human_repr(self) -> str: ...
     # private API
     @classmethod

diff --git a/yarl/_url.py b/yarl/_url.py
@@ -315,25 +315,7 @@ def __gt__(self, other):
         return self._val > other._val
 
     def __truediv__(self, name):
-        name = self._PATH_QUOTER(name)
-        if name.startswith("/"):
-            raise ValueError(
-                f"Appending path {name!r} starting from slash is forbidden"
-            )
-        path = self._val.path
-        if path == "/":
-            new_path = "/" + name
-        elif not path and not self.is_absolute():
-            new_path = name
-        else:
-            parts = path.rstrip("/").split("/")
-            parts.append(name)
-            new_path = "/".join(parts)
-        if self.is_absolute():
-            new_path = self._normalize_path(new_path)
-        return URL(
-            self._val._replace(path=new_path, query="", fragment=""), encoded=True
-        )
+        return self._make_child((name,))
 
     def __mod__(self, query):
         return self.update_query(query)
@@ -701,11 +683,45 @@ def _validate_authority_uri_abs_path(host, path):
                 "Path in a URL with authority should start with a slash ('/') if set"
             )
 
+    def _make_child(self, segments, encoded=False):
+        # add segments to self._val.path, accounting for absolute vs relative paths
+        parsed = []
+        for seg in reversed(segments):
+            if not seg:
+                continue
+            if seg[0] == "/":
+                raise ValueError(
+                    f"Appending path {seg!r} starting from slash is forbidden"
+                )
+            seg = seg if encoded else self._PATH_QUOTER(seg)
+            if "/" in seg:
+                parsed += (
+                    sub for sub in reversed(seg.split("/")) if sub and sub != "."
+                )
+            elif seg != ".":
+                parsed.append(seg)
+        parsed.reverse()
+        old_path = self._val.path
+        if old_path:
+            parsed = [*old_path.rstrip("/").split("/"), *parsed]
+        if self.is_absolute():
+            parsed = self._normalize_path_segments(parsed)
+        new_path = "/".join(parsed)
+        return URL(
+            self._val._replace(path=new_path, query="", fragment=""), encoded=True
+        )
+
     @classmethod
     def _normalize_path(cls, path):
-        # Drop '.' and '..' from path
+        # Drop '.' and '..' from str path
 
         segments = path.split("/")
+        return "/".join(cls._normalize_path_segments(segments))
+
+    @classmethod
+    def _normalize_path_segments(cls, segments):
+        # Drop '.' and '..' from a sequence of str segments
+
         resolved_path = []
 
         for seg in segments:
@@ -728,7 +744,7 @@ def _normalize_path(cls, path):
             # then we need to append the trailing '/'
             resolved_path.append("")
 
-        return "/".join(resolved_path)
+        return resolved_path
 
     @classmethod
     def _encode_host(cls, host, human=False):
@@ -1077,6 +1093,10 @@ def join(self, url):
             raise TypeError("url should be URL")
         return URL(urljoin(str(self), str(url)), encoded=True)
 
+    def joinpath(self, *other, encoded=False):
+        """Return a new URL with the elements in other appended to the path."""
+        return self._make_child(other, encoded=encoded)
+
     def human_repr(self):
         """Return decoded human readable string for URL representation."""
         user = _human_quote(self.user, "#/:?@")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.