Skip to content

Commit

Permalink
Add URL.joinpath(*elements, encoding=False) to build a URL with multi…
Browse files Browse the repository at this point in the history
…ple new path elements

This is analogous to `Path(...).joinpath(...)`, except that it will not accept elements that start with `/`.
  • Loading branch information
mjpieters authored and webknjaz committed Aug 1, 2022
1 parent dcced6d commit 2edbe41
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGES/704.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.
24 changes: 24 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,30 @@ The path is encoded if needed.
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')

.. method:: URL.joinpath(*other, encoded=False)

Construct a new URL by with all ``other`` elements appended to
*path*, and cleaned up *query* and *fragment* parts.

Passing ``encoded=True`` parameter prevents path element auto-encoding, user is
responsible for taking care of URL correctness.

.. doctest::

>>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath')
>>> url
URL('http://example.com/path/to/subpath')
>>> url.parts
('/', 'path', 'to', 'subpath')
>>> url = URL('http://example.com/path?arg#frag').joinpath('сюда')
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
>>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True)
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')

.. versionadded:: 1.9

.. method:: URL.join(url)

Construct a full (“absolute”) URL by combining a “base URL”
Expand Down
95 changes: 95 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,101 @@ def test_div_with_dots():
assert url.raw_path == "/path/to"


# joinpath


def test_joinpath_root():
url = URL("http://example.com")
assert str(url.joinpath("path", "to")) == "http://example.com/path/to"


def test_joinpath_root_with_slash():
url = URL("http://example.com/")
assert str(url.joinpath("path", "to")) == "http://example.com/path/to"


def test_joinpath():
url = URL("http://example.com/path")
assert str(url.joinpath("to")) == "http://example.com/path/to"


def test_joinpath_with_slash():
url = URL("http://example.com/path/")
assert str(url.joinpath("to")) == "http://example.com/path/to"


def test_joinpath_path_starting_from_slash_is_forbidden():
url = URL("http://example.com/path/")
with pytest.raises(ValueError):
assert url.joinpath("/to/others")


def test_joinpath_cleanup_query_and_fragment():
url = URL("http://example.com/path?a=1#frag")
assert str(url.joinpath("to")) == "http://example.com/path/to"


def test_joinpath_for_empty_url():
url = URL().joinpath("a")
assert url.raw_parts == ("a",)


def test_joinpath_for_relative_url():
url = URL("a").joinpath("b")
assert url.raw_parts == ("a", "b")


def test_joinpath_with_empty_element():
url = URL("a").joinpath("b", "", "c")
assert url.raw_parts == ("a", "b", "c")


def test_joinpath_for_relative_url_started_with_slash():
url = URL("/a").joinpath("b")
assert url.raw_parts == ("/", "a", "b")


def test_joinpath_non_ascii():
url = URL("http://example.com/сюда")
url2 = url.joinpath("туда")
assert url2.path == "/сюда/туда"
assert url2.raw_path == "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0"
assert url2.parts == ("/", "сюда", "туда")
assert url2.raw_parts == (
"/",
"%D1%81%D1%8E%D0%B4%D0%B0",
"%D1%82%D1%83%D0%B4%D0%B0",
)


def test_joinpath_percent_encoded():
url = URL("http://example.com/path")
url2 = url.joinpath("%cf%80")
assert url2.path == "/path/%cf%80"
assert url2.raw_path == "/path/%25cf%2580"
assert url2.parts == ("/", "path", "%cf%80")
assert url2.raw_parts == ("/", "path", "%25cf%2580")


def test_joinpath_encoded_percent_encoded():
url = URL("http://example.com/path")
url2 = url.joinpath("%cf%80", encoded=True)
assert url2.path == "/path/π"
assert url2.raw_path == "/path/%cf%80"
assert url2.parts == ("/", "path", "π")
assert url2.raw_parts == ("/", "path", "%cf%80")


def test_joinpath_with_colon_and_at():
url = URL("http://example.com/base").joinpath("path:abc@123")
assert url.raw_path == "/base/path:abc@123"


def test_joinpath_with_dots():
url = URL("http://example.com/base").joinpath("..", "path", ".", "to")
assert url.raw_path == "/path/to"


# with_path


Expand Down
1 change: 1 addition & 0 deletions yarl/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class URL:
def with_name(self, name: str) -> URL: ...
def with_suffix(self, suffix: str) -> URL: ...
def join(self, url: URL) -> URL: ...
def joinpath(self, *url: str) -> URL: ...
def human_repr(self) -> str: ...
# private API
@classmethod
Expand Down
62 changes: 41 additions & 21 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,25 +315,7 @@ def __gt__(self, other):
return self._val > other._val

def __truediv__(self, name):
name = self._PATH_QUOTER(name)
if name.startswith("/"):
raise ValueError(
f"Appending path {name!r} starting from slash is forbidden"
)
path = self._val.path
if path == "/":
new_path = "/" + name
elif not path and not self.is_absolute():
new_path = name
else:
parts = path.rstrip("/").split("/")
parts.append(name)
new_path = "/".join(parts)
if self.is_absolute():
new_path = self._normalize_path(new_path)
return URL(
self._val._replace(path=new_path, query="", fragment=""), encoded=True
)
return self._make_child((name,))

def __mod__(self, query):
return self.update_query(query)
Expand Down Expand Up @@ -701,11 +683,45 @@ def _validate_authority_uri_abs_path(host, path):
"Path in a URL with authority should start with a slash ('/') if set"
)

def _make_child(self, segments, encoded=False):
# add segments to self._val.path, accounting for absolute vs relative paths
parsed = []
for seg in reversed(segments):
if not seg:
continue
if seg[0] == "/":
raise ValueError(
f"Appending path {seg!r} starting from slash is forbidden"
)
seg = seg if encoded else self._PATH_QUOTER(seg)
if "/" in seg:
parsed += (
sub for sub in reversed(seg.split("/")) if sub and sub != "."
)
elif seg != ".":
parsed.append(seg)
parsed.reverse()
old_path = self._val.path
if old_path:
parsed = [*old_path.rstrip("/").split("/"), *parsed]
if self.is_absolute():
parsed = self._normalize_path_segments(parsed)
new_path = "/".join(parsed)
return URL(
self._val._replace(path=new_path, query="", fragment=""), encoded=True
)

@classmethod
def _normalize_path(cls, path):
# Drop '.' and '..' from path
# Drop '.' and '..' from str path

segments = path.split("/")
return "/".join(cls._normalize_path_segments(segments))

@classmethod
def _normalize_path_segments(cls, segments):
# Drop '.' and '..' from a sequence of str segments

resolved_path = []

for seg in segments:
Expand All @@ -728,7 +744,7 @@ def _normalize_path(cls, path):
# then we need to append the trailing '/'
resolved_path.append("")

return "/".join(resolved_path)
return resolved_path

@classmethod
def _encode_host(cls, host, human=False):
Expand Down Expand Up @@ -1077,6 +1093,10 @@ def join(self, url):
raise TypeError("url should be URL")
return URL(urljoin(str(self), str(url)), encoded=True)

def joinpath(self, *other, encoded=False):
"""Return a new URL with the elements in other appended to the path."""
return self._make_child(other, encoded=encoded)

def human_repr(self):
"""Return decoded human readable string for URL representation."""
user = _human_quote(self.user, "#/:?@")
Expand Down

0 comments on commit 2edbe41

Please sign in to comment.