diff --git a/tests/test_normalize_path.py b/tests/test_normalize_path.py index 48726481a..f10432ab4 100644 --- a/tests/test_normalize_path.py +++ b/tests/test_normalize_path.py @@ -1,6 +1,6 @@ import pytest -from yarl._url import _normalize_path +from yarl._path import normalize_path PATHS = [ # No dots @@ -33,4 +33,4 @@ @pytest.mark.parametrize("original,expected", PATHS) def test__normalize_path(original, expected): - assert _normalize_path(original) == expected + assert normalize_path(original) == expected diff --git a/yarl/_path.py b/yarl/_path.py new file mode 100644 index 000000000..c22f0b4b8 --- /dev/null +++ b/yarl/_path.py @@ -0,0 +1,41 @@ +"""Utilities for working with paths.""" + +from collections.abc import Sequence +from contextlib import suppress + + +def normalize_path_segments(segments: Sequence[str]) -> list[str]: + """Drop '.' and '..' from a sequence of str segments""" + + resolved_path: list[str] = [] + + for seg in segments: + if seg == "..": + # ignore any .. segments that would otherwise cause an + # IndexError when popped from resolved_path if + # resolving for rfc3986 + with suppress(IndexError): + resolved_path.pop() + elif seg != ".": + resolved_path.append(seg) + + if segments and segments[-1] in (".", ".."): + # do some post-processing here. + # if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append("") + + return resolved_path + + +def normalize_path(path: str) -> str: + # Drop '.' and '..' from str path + prefix = "" + if path and path[0] == "/": + # preserve the "/" root element of absolute paths, copying it to the + # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. + prefix = "/" + path = path[1:] + + segments = path.split("/") + return prefix + "/".join(normalize_path_segments(segments)) diff --git a/yarl/_url.py b/yarl/_url.py index 77193db08..966fd3743 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -2,7 +2,6 @@ import sys import warnings from collections.abc import Mapping, Sequence -from contextlib import suppress from functools import _CacheInfo, lru_cache from ipaddress import ip_address from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload @@ -13,6 +12,7 @@ from propcache.api import under_cached_property as cached_property from ._parse import USES_AUTHORITY, make_netloc, split_netloc, split_url, unsplit_result +from ._path import normalize_path, normalize_path_segments from ._query import ( Query, QueryVariable, @@ -121,43 +121,6 @@ def rewrite_module(obj: _T) -> _T: return obj -def _normalize_path_segments(segments: "Sequence[str]") -> list[str]: - """Drop '.' and '..' from a sequence of str segments""" - - resolved_path: list[str] = [] - - for seg in segments: - if seg == "..": - # ignore any .. segments that would otherwise cause an - # IndexError when popped from resolved_path if - # resolving for rfc3986 - with suppress(IndexError): - resolved_path.pop() - elif seg != ".": - resolved_path.append(seg) - - if segments and segments[-1] in (".", ".."): - # do some post-processing here. - # if the last segment was a relative dir, - # then we need to append the trailing '/' - resolved_path.append("") - - return resolved_path - - -def _normalize_path(path: str) -> str: - # Drop '.' and '..' from str path - prefix = "" - if path and path[0] == "/": - # preserve the "/" root element of absolute paths, copying it to the - # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. - prefix = "/" - path = path[1:] - - segments = path.split("/") - return prefix + "/".join(_normalize_path_segments(segments)) - - def _raise_for_authority_missing_abs_path() -> None: """Raise when he path in URL with authority starts lacks a leading slash.""" msg = "Path in a URL with authority should start with a slash ('/') if set" @@ -306,7 +269,7 @@ def __new__( path = PATH_REQUOTER(path) if netloc: if "." in path: - path = _normalize_path(path) + path = normalize_path(path) if path[0] != "/": _raise_for_authority_missing_abs_path() @@ -411,7 +374,7 @@ def build( path = PATH_QUOTER(path) if path else path if path and netloc: if "." in path: - path = _normalize_path(path) + path = normalize_path(path) if path[0] != "/": _raise_for_authority_missing_abs_path() @@ -964,7 +927,7 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": if netloc := netloc: # If the netloc is present, we need to ensure that the path is normalized - parsed = _normalize_path_segments(parsed) if needs_normalize else parsed + parsed = normalize_path_segments(parsed) if needs_normalize else parsed if parsed and parsed[0] != "": # inject a leading slash when adding a path to an absolute URL # where there was none before @@ -1082,7 +1045,7 @@ def with_path(self, path: str, *, encoded: bool = False) -> "URL": if not encoded: path = PATH_QUOTER(path) if netloc: - path = _normalize_path(path) if "." in path else path + path = normalize_path(path) if "." in path else path if path and path[0] != "/": path = f"/{path}" return self._from_tup((scheme, netloc, path, "", "")) @@ -1325,7 +1288,7 @@ def join(self, url: "URL") -> "URL": # which has to be removed if orig_path[0] == "/": path = path[1:] - path = _normalize_path(path) if "." in path else path + path = normalize_path(path) if "." in path else path return self._from_tup((scheme, orig_netloc, path, query, fragment))