Skip to content

Commit

Permalink
Add URL.joinpath(*elements, encoding=False) to build a URL with multi…
Browse files Browse the repository at this point in the history
…ple new path elements

This is analogous to `Path(...).joinpath(...)`, except that it will not accept elements that start with `/`.
  • Loading branch information
mjpieters committed Dec 13, 2022
1 parent 15d4617 commit 3e942f7
Show file tree
Hide file tree
Showing 5 changed files with 190 additions and 39 deletions.
1 change: 1 addition & 0 deletions CHANGES/704.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.
24 changes: 24 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,30 @@ The path is encoded if needed.
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')

.. method:: URL.joinpath(*other, encoded=False)

Construct a new URL by with all ``other`` elements appended to
*path*, and cleaned up *query* and *fragment* parts.

Passing ``encoded=True`` parameter prevents path element auto-encoding, the caller is
responsible for taking care of URL correctness.

.. doctest::

>>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath')
>>> url
URL('http://example.com/path/to/subpath')
>>> url.parts
('/', 'path', 'to', 'subpath')
>>> url = URL('http://example.com/path?arg#frag').joinpath('сюда')
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
>>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True)
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')

.. versionadded:: 1.9

.. method:: URL.join(url)

Construct a full (“absolute”) URL by combining a “base URL”
Expand Down
105 changes: 105 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,111 @@ def test_div_with_dots():
assert url.raw_path == "/path/to"


# joinpath


@pytest.mark.parametrize(
"base,to_join,expected",
[
pytest.param("", ("path", "to"), "http://example.com/path/to", id="root"),
pytest.param(
"/", ("path", "to"), "http://example.com/path/to", id="root-with-slash"
),
pytest.param("/path", ("to",), "http://example.com/path/to", id="path"),
pytest.param(
"/path/", ("to",), "http://example.com/path/to", id="path-with-slash"
),
pytest.param(
"/path?a=1#frag",
("to",),
"http://example.com/path/to",
id="cleanup-query-and-fragment",
),
],
)
def test_joinpath(base, to_join, expected):
url = URL(f"http://example.com{base}")
assert str(url.joinpath(*to_join)) == expected


@pytest.mark.parametrize(
"url,to_join,expected",
[
pytest.param(URL(), ("a",), ("a",), id="empty-url"),
pytest.param(URL("a"), ("b",), ("a", "b"), id="relative-path"),
pytest.param(URL("a"), ("b", "", "c"), ("a", "b", "c"), id="empty-element"),
pytest.param(URL("/a"), ("b"), ("/", "a", "b"), id="absolute-path"),
],
)
def test_joinpath_relative(url, to_join, expected):
assert url.joinpath(*to_join).raw_parts == expected


@pytest.mark.parametrize(
"url,to_join,encoded,e_path,e_raw_path,e_parts,e_raw_parts",
[
pytest.param(
"http://example.com/сюда",
("туда",),
False,
"/сюда/туда",
"/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0",
("/", "сюда", "туда"),
("/", "%D1%81%D1%8E%D0%B4%D0%B0", "%D1%82%D1%83%D0%B4%D0%B0"),
id="non-ascii",
),
pytest.param(
"http://example.com/path",
("%cf%80",),
False,
"/path/%cf%80",
"/path/%25cf%2580",
("/", "path", "%cf%80"),
("/", "path", "%25cf%2580"),
id="percent-encoded",
),
pytest.param(
"http://example.com/path",
("%cf%80",),
True,
"/path/π",
"/path/%cf%80",
("/", "path", "π"),
("/", "path", "%cf%80"),
id="encoded-percent-encoded",
),
],
)
def test_joinpath_encoding(
url, to_join, encoded, e_path, e_raw_path, e_parts, e_raw_parts
):
joined = URL(url).joinpath(*to_join, encoded=encoded)
assert joined.path == e_path
assert joined.raw_path == e_raw_path
assert joined.parts == e_parts
assert joined.raw_parts == e_raw_parts


@pytest.mark.parametrize(
"to_join,expected",
[
pytest.param(("path:abc@123",), "/base/path:abc@123", id="with-colon-and-at"),
pytest.param(("..", "path", ".", "to"), "/path/to", id="with-dots"),
],
)
def test_joinpath_edgecases(to_join, expected):
url = URL("http://example.com/base").joinpath(*to_join)
assert url.raw_path == expected


def test_joinpath_path_starting_from_slash_is_forbidden():
url = URL("http://example.com/path/")
with pytest.raises(
ValueError, match="Appending path .* starting from slash is forbidden"
):
assert url.joinpath("/to/others")


# with_path


Expand Down
1 change: 1 addition & 0 deletions yarl/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class URL:
def with_name(self, name: str) -> URL: ...
def with_suffix(self, suffix: str) -> URL: ...
def join(self, url: URL) -> URL: ...
def joinpath(self, *url: str) -> URL: ...
def human_repr(self) -> str: ...
# private API
@classmethod
Expand Down
98 changes: 59 additions & 39 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,30 @@ def __set__(self, inst, value):
raise AttributeError("cached property is read-only")


def _normalize_path_segments(segments):
"""Drop '.' and '..' from a sequence of str segments"""

resolved_path = []

for seg in segments:
if seg == "..":
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
with suppress(IndexError):
resolved_path.pop()
elif seg != ".":
resolved_path.append(seg)

if segments and segments[-1] in (".", ".."):
# do some post-processing here.
# if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append("")

return resolved_path


@rewrite_module
class URL:
# Don't derive from str
Expand Down Expand Up @@ -316,25 +340,7 @@ def __gt__(self, other):
return self._val > other._val

def __truediv__(self, name):
name = self._PATH_QUOTER(name)
if name.startswith("/"):
raise ValueError(
f"Appending path {name!r} starting from slash is forbidden"
)
path = self._val.path
if path == "/":
new_path = "/" + name
elif not path and not self.is_absolute():
new_path = name
else:
parts = path.rstrip("/").split("/")
parts.append(name)
new_path = "/".join(parts)
if self.is_absolute():
new_path = self._normalize_path(new_path)
return URL(
self._val._replace(path=new_path, query="", fragment=""), encoded=True
)
return self._make_child((name,))

def __mod__(self, query):
return self.update_query(query)
Expand Down Expand Up @@ -702,9 +708,37 @@ def _validate_authority_uri_abs_path(host, path):
"Path in a URL with authority should start with a slash ('/') if set"
)

def _make_child(self, segments, encoded=False):
"""add segments to self._val.path, accounting for absolute vs relative paths"""
parsed = []
for seg in reversed(segments):
if not seg:
continue
if seg[0] == "/":
raise ValueError(
f"Appending path {seg!r} starting from slash is forbidden"
)
seg = seg if encoded else self._PATH_QUOTER(seg)
if "/" in seg:
parsed += (
sub for sub in reversed(seg.split("/")) if sub and sub != "."
)
elif seg != ".":
parsed.append(seg)
parsed.reverse()
old_path = self._val.path
if old_path:
parsed = [*old_path.rstrip("/").split("/"), *parsed]
if self.is_absolute():
parsed = _normalize_path_segments(parsed)
new_path = "/".join(parsed)
return URL(
self._val._replace(path=new_path, query="", fragment=""), encoded=True
)

@classmethod
def _normalize_path(cls, path):
# Drop '.' and '..' from path
# Drop '.' and '..' from str path

prefix = ""
if path.startswith("/"):
Expand All @@ -714,25 +748,7 @@ def _normalize_path(cls, path):
path = path[1:]

segments = path.split("/")
resolved_path = []

for seg in segments:
if seg == "..":
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
with suppress(IndexError):
resolved_path.pop()
elif seg != ".":
resolved_path.append(seg)

if segments and segments[-1] in (".", ".."):
# do some post-processing here.
# if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append("")

return prefix + "/".join(resolved_path)
return prefix + "/".join(_normalize_path_segments(segments))

@classmethod
def _encode_host(cls, host, human=False):
Expand Down Expand Up @@ -1086,6 +1102,10 @@ def join(self, url):
raise TypeError("url should be URL")
return URL(urljoin(str(self), str(url)), encoded=True)

def joinpath(self, *other, encoded=False):
"""Return a new URL with the elements in other appended to the path."""
return self._make_child(other, encoded=encoded)

def human_repr(self):
"""Return decoded human readable string for URL representation."""
user = _human_quote(self.user, "#/:?@")
Expand Down

0 comments on commit 3e942f7

Please sign in to comment.