From 5bb059fe606983814a445e4dcf9e96fd7cb4951a Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 25 Nov 2024 19:59:20 +0000 Subject: [PATCH] GH-127236: `pathname2url()`: generate RFC 1738 URL for absolute POSIX path (#127194) When handed an absolute Windows path such as `C:\foo` or `//server/share`, the `urllib.request.pathname2url()` function returns a URL with an authority section, such as `///C:/foo` or `//server/share` (or before GH-126205, `////server/share`). Only the `file:` prefix is omitted. But when handed an absolute POSIX path such as `/etc/hosts`, or a Windows path of the same form (rooted but lacking a drive), the function returns a URL without an authority section, such as `/etc/hosts`. This patch corrects the discrepancy by adding a `//` prefix before drive-less, rooted paths when generating URLs. --- Doc/library/urllib.request.rst | 10 ++++++---- Lib/nturl2path.py | 20 +++++++++++-------- Lib/test/test_urllib.py | 10 +++++----- Lib/urllib/request.py | 8 +++++--- ...-11-23-12-25-06.gh-issue-125866.wEOP66.rst | 5 +++++ 5 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 9055556a3703bb..3c07dc4adf434a 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -159,12 +159,14 @@ The :mod:`urllib.request` module defines the following functions: 'file:///C:/Program%20Files' .. versionchanged:: 3.14 - Windows drive letters are no longer converted to uppercase. + Paths beginning with a slash are converted to URLs with authority + sections. For example, the path ``/etc/hosts`` is converted to + the URL ``///etc/hosts``. .. versionchanged:: 3.14 - On Windows, ``:`` characters not following a drive letter are quoted. In - previous versions, :exc:`OSError` was raised if a colon character was - found in any position other than the second character. + Windows drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an + :exc:`OSError` exception to be raised on Windows. .. function:: url2pathname(url) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 01135d1b7683b2..7e13ae3128333d 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -55,13 +55,17 @@ def pathname2url(p): p = p[4:] if p[:4].upper() == 'UNC/': p = '//' + p[4:] - drive, tail = ntpath.splitdrive(p) - if drive[1:] == ':': - # DOS drive specified. Add three slashes to the start, producing - # an authority section with a zero-length authority, and a path - # section starting with a single slash. - drive = f'///{drive}' + drive, root, tail = ntpath.splitroot(p) + if drive: + if drive[1:] == ':': + # DOS drive specified. Add three slashes to the start, producing + # an authority section with a zero-length authority, and a path + # section starting with a single slash. + drive = f'///{drive}' + drive = urllib.parse.quote(drive, safe='/:') + elif root: + # Add explicitly empty authority to path beginning with one slash. + root = f'//{root}' - drive = urllib.parse.quote(drive, safe='/:') tail = urllib.parse.quote(tail) - return drive + tail + return drive + root + tail diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index fe16badc5bc77d..00e46990c406ac 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1434,7 +1434,7 @@ def test_pathname2url_win(self): self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar') self.assertEqual(fn('foo:bar'), 'foo%3Abar') # No drive letter - self.assertEqual(fn("\\folder\\test\\"), '/folder/test/') + self.assertEqual(fn("\\folder\\test\\"), '///folder/test/') self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/') self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/') self.assertEqual(fn('\\\\some\\share\\'), '//some/share/') @@ -1447,7 +1447,7 @@ def test_pathname2url_win(self): self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir') # Round-tripping urls = ['///C:', - '/folder/test/', + '///folder/test/', '///C:/foo/bar/spam.foo'] for url in urls: self.assertEqual(fn(urllib.request.url2pathname(url)), url) @@ -1456,12 +1456,12 @@ def test_pathname2url_win(self): 'test specific to POSIX pathnames') def test_pathname2url_posix(self): fn = urllib.request.pathname2url - self.assertEqual(fn('/'), '/') - self.assertEqual(fn('/a/b.c'), '/a/b.c') + self.assertEqual(fn('/'), '///') + self.assertEqual(fn('/a/b.c'), '///a/b.c') self.assertEqual(fn('//a/b.c'), '////a/b.c') self.assertEqual(fn('///a/b.c'), '/////a/b.c') self.assertEqual(fn('////a/b.c'), '//////a/b.c') - self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c') + self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_pathname2url_nonascii(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 9e555432688a5b..1fcaa89188188d 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1667,9 +1667,11 @@ def url2pathname(pathname): def pathname2url(pathname): """OS-specific conversion from a file system path to a relative URL of the 'file' scheme; not recommended for general use.""" - if pathname[:2] == '//': - # Add explicitly empty authority to avoid interpreting the path - # as authority. + if pathname[:1] == '/': + # Add explicitly empty authority to absolute path. If the path + # starts with exactly one slash then this change is mostly + # cosmetic, but if it begins with two or more slashes then this + # avoids interpreting the path as a URL authority. pathname = '//' + pathname encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() diff --git a/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst new file mode 100644 index 00000000000000..0b8ffdb3901db3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst @@ -0,0 +1,5 @@ +:func:`urllib.request.pathname2url` now adds an empty authority when +generating a URL for a path that begins with exactly one slash. For example, +the path ``/etc/hosts`` is converted to the scheme-less URL ``///etc/hosts``. +As a result of this change, URLs without authorities are only generated for +relative paths.