Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add URL.joinpath(*elements, encoding=False) to build a URL with multiple new path elements #704

Merged
merged 1 commit into from
Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/704.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.
24 changes: 24 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,30 @@ The path is encoded if needed.
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')

.. method:: URL.joinpath(*other, encoded=False)

Construct a new URL by with all ``other`` elements appended to
*path*, and cleaned up *query* and *fragment* parts.

Passing ``encoded=True`` parameter prevents path element auto-encoding, the caller is
responsible for taking care of URL correctness.

.. doctest::

>>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath')
>>> url
URL('http://example.com/path/to/subpath')
>>> url.parts
('/', 'path', 'to', 'subpath')
>>> url = URL('http://example.com/path?arg#frag').joinpath('сюда')
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
>>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True)
>>> url
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')

.. versionadded:: 1.9

.. method:: URL.join(url)

Construct a full (“absolute”) URL by combining a “base URL”
Expand Down
105 changes: 105 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,111 @@ def test_div_with_dots():
assert url.raw_path == "/path/to"


# joinpath


@pytest.mark.parametrize(
"base,to_join,expected",
[
pytest.param("", ("path", "to"), "http://example.com/path/to", id="root"),
pytest.param(
"/", ("path", "to"), "http://example.com/path/to", id="root-with-slash"
),
pytest.param("/path", ("to",), "http://example.com/path/to", id="path"),
pytest.param(
"/path/", ("to",), "http://example.com/path/to", id="path-with-slash"
),
pytest.param(
"/path?a=1#frag",
("to",),
"http://example.com/path/to",
id="cleanup-query-and-fragment",
),
],
)
def test_joinpath(base, to_join, expected):
url = URL(f"http://example.com{base}")
assert str(url.joinpath(*to_join)) == expected


@pytest.mark.parametrize(
"url,to_join,expected",
[
pytest.param(URL(), ("a",), ("a",), id="empty-url"),
pytest.param(URL("a"), ("b",), ("a", "b"), id="relative-path"),
pytest.param(URL("a"), ("b", "", "c"), ("a", "b", "c"), id="empty-element"),
pytest.param(URL("/a"), ("b"), ("/", "a", "b"), id="absolute-path"),
],
)
def test_joinpath_relative(url, to_join, expected):
assert url.joinpath(*to_join).raw_parts == expected


@pytest.mark.parametrize(
"url,to_join,encoded,e_path,e_raw_path,e_parts,e_raw_parts",
[
pytest.param(
"http://example.com/сюда",
("туда",),
False,
"/сюда/туда",
"/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0",
("/", "сюда", "туда"),
("/", "%D1%81%D1%8E%D0%B4%D0%B0", "%D1%82%D1%83%D0%B4%D0%B0"),
id="non-ascii",
),
pytest.param(
"http://example.com/path",
("%cf%80",),
False,
"/path/%cf%80",
"/path/%25cf%2580",
("/", "path", "%cf%80"),
("/", "path", "%25cf%2580"),
id="percent-encoded",
),
pytest.param(
"http://example.com/path",
("%cf%80",),
True,
"/path/π",
"/path/%cf%80",
("/", "path", "π"),
("/", "path", "%cf%80"),
id="encoded-percent-encoded",
),
],
)
def test_joinpath_encoding(
url, to_join, encoded, e_path, e_raw_path, e_parts, e_raw_parts
):
joined = URL(url).joinpath(*to_join, encoded=encoded)
assert joined.path == e_path
assert joined.raw_path == e_raw_path
assert joined.parts == e_parts
assert joined.raw_parts == e_raw_parts


@pytest.mark.parametrize(
"to_join,expected",
[
pytest.param(("path:abc@123",), "/base/path:abc@123", id="with-colon-and-at"),
pytest.param(("..", "path", ".", "to"), "/path/to", id="with-dots"),
],
)
def test_joinpath_edgecases(to_join, expected):
url = URL("http://example.com/base").joinpath(*to_join)
assert url.raw_path == expected


def test_joinpath_path_starting_from_slash_is_forbidden():
url = URL("http://example.com/path/")
with pytest.raises(
ValueError, match="Appending path .* starting from slash is forbidden"
):
assert url.joinpath("/to/others")


# with_path


Expand Down
1 change: 1 addition & 0 deletions yarl/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class URL:
def with_name(self, name: str) -> URL: ...
def with_suffix(self, suffix: str) -> URL: ...
def join(self, url: URL) -> URL: ...
def joinpath(self, *url: str) -> URL: ...
def human_repr(self) -> str: ...
# private API
@classmethod
Expand Down
98 changes: 59 additions & 39 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,30 @@ def __set__(self, inst, value):
raise AttributeError("cached property is read-only")


def _normalize_path_segments(segments):
"""Drop '.' and '..' from a sequence of str segments"""

resolved_path = []

for seg in segments:
if seg == "..":
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
with suppress(IndexError):
resolved_path.pop()
elif seg != ".":
resolved_path.append(seg)

if segments and segments[-1] in (".", ".."):
# do some post-processing here.
# if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append("")

return resolved_path


@rewrite_module
class URL:
# Don't derive from str
Expand Down Expand Up @@ -316,25 +340,7 @@ def __gt__(self, other):
return self._val > other._val

def __truediv__(self, name):
name = self._PATH_QUOTER(name)
if name.startswith("/"):
raise ValueError(
f"Appending path {name!r} starting from slash is forbidden"
)
path = self._val.path
if path == "/":
new_path = "/" + name
elif not path and not self.is_absolute():
new_path = name
else:
parts = path.rstrip("/").split("/")
parts.append(name)
new_path = "/".join(parts)
if self.is_absolute():
new_path = self._normalize_path(new_path)
return URL(
self._val._replace(path=new_path, query="", fragment=""), encoded=True
)
return self._make_child((name,))

def __mod__(self, query):
return self.update_query(query)
Expand Down Expand Up @@ -702,9 +708,37 @@ def _validate_authority_uri_abs_path(host, path):
"Path in a URL with authority should start with a slash ('/') if set"
)

def _make_child(self, segments, encoded=False):
"""add segments to self._val.path, accounting for absolute vs relative paths"""
parsed = []
for seg in reversed(segments):
if not seg:
continue
if seg[0] == "/":
raise ValueError(
f"Appending path {seg!r} starting from slash is forbidden"
)
seg = seg if encoded else self._PATH_QUOTER(seg)
if "/" in seg:
parsed += (
sub for sub in reversed(seg.split("/")) if sub and sub != "."
)
elif seg != ".":
parsed.append(seg)
parsed.reverse()
old_path = self._val.path
if old_path:
parsed = [*old_path.rstrip("/").split("/"), *parsed]
if self.is_absolute():
parsed = _normalize_path_segments(parsed)
new_path = "/".join(parsed)
return URL(
self._val._replace(path=new_path, query="", fragment=""), encoded=True
)

@classmethod
def _normalize_path(cls, path):
# Drop '.' and '..' from path
# Drop '.' and '..' from str path

prefix = ""
if path.startswith("/"):
Expand All @@ -714,25 +748,7 @@ def _normalize_path(cls, path):
path = path[1:]

segments = path.split("/")
resolved_path = []

for seg in segments:
if seg == "..":
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
with suppress(IndexError):
resolved_path.pop()
elif seg != ".":
resolved_path.append(seg)

if segments and segments[-1] in (".", ".."):
# do some post-processing here.
# if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append("")

return prefix + "/".join(resolved_path)
return prefix + "/".join(_normalize_path_segments(segments))

@classmethod
def _encode_host(cls, host, human=False):
Expand Down Expand Up @@ -1086,6 +1102,10 @@ def join(self, url):
raise TypeError("url should be URL")
return URL(urljoin(str(self), str(url)), encoded=True)

def joinpath(self, *other, encoded=False):
"""Return a new URL with the elements in other appended to the path."""
return self._make_child(other, encoded=encoded)

def human_repr(self):
"""Return decoded human readable string for URL representation."""
user = _human_quote(self.user, "#/:?@")
Expand Down