From 838e9340e5f4e7f2a1cb9404d0a75aabbedf663a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 14:03:18 +0200 Subject: [PATCH 01/17] Update parse.py --- Lib/urllib/parse.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b6608783a89471..16f0c1cbb9ccec 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -428,7 +428,7 @@ def urlsplit(url, scheme='', allow_fragments=True): return _coerce_result(cached) if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() - netloc = query = fragment = '' + netloc = query = fragment = None # no components NOR DELIMITERS i = url.find(':') if i > 0: if url[:i] == 'http': # optimize the common case @@ -495,9 +495,11 @@ def urlunsplit(components): url = '//' + (netloc or '') + url if scheme: url = scheme + ':' + url - if query: + # keep the delimiter if present (even if the component is empty) + if query is not None: url = url + '?' + query - if fragment: + # keep the delimiter if present (even if the component is empty) + if fragment is not None: url = url + '#' + fragment return _coerce_result(url) From 8bffcf3412a34b44f0dba24705922b45b2e265c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 14:16:54 +0200 Subject: [PATCH 02/17] Update parse.py --- Lib/urllib/parse.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 16f0c1cbb9ccec..c2998835848642 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -484,10 +484,8 @@ def urlunparse(components): def urlunsplit(components): """Combine the elements of a tuple as returned by urlsplit() into a - complete URL as a string. The data argument can be any five-item iterable. - This may result in a slightly different, but equivalent URL, if the URL that - was parsed originally had unnecessary delimiters (for example, a ? with an - empty query; the RFC states that these are equivalent).""" + complete URL as a string. The data argument can be any five-item + iterable.""" scheme, netloc, url, query, fragment, _coerce_result = ( _coerce_args(*components)) if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): From e7e1f7a4cea8abdd2ee28c86f937dcb693050995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 14:19:03 +0200 Subject: [PATCH 03/17] Update urllib.parse.rst --- Doc/library/urllib.parse.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 49276daa7ff43f..39237a609b72c4 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -239,10 +239,7 @@ or on combining URL components into a URL string. .. function:: urlunparse(parts) Construct a URL from a tuple as returned by ``urlparse()``. The *parts* - argument can be any six-item iterable. This may result in a slightly - different, but equivalent URL, if the URL that was parsed originally had - unnecessary delimiters (for example, a ``?`` with an empty query; the RFC - states that these are equivalent). + argument can be any six-item iterable. .. function:: urlsplit(urlstring, scheme='', allow_fragments=True) @@ -307,9 +304,7 @@ or on combining URL components into a URL string. Combine the elements of a tuple as returned by :func:`urlsplit` into a complete URL as a string. The *parts* argument can be any five-item - iterable. This may result in a slightly different, but equivalent URL, if the - URL that was parsed originally had unnecessary delimiters (for example, a ? - with an empty query; the RFC states that these are equivalent). + iterable. .. function:: urljoin(base, url, allow_fragments=True) From 050de62ef6b9e74315768af5c5e21c433642553f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 14:41:53 +0200 Subject: [PATCH 04/17] Update parse.py --- Lib/urllib/parse.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index c2998835848642..ad2844bdad8b94 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -428,7 +428,8 @@ def urlsplit(url, scheme='', allow_fragments=True): return _coerce_result(cached) if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() - netloc = query = fragment = None # no components NOR DELIMITERS + netloc = '' + query = fragment = None # no components NOR DELIMITERS i = url.find(':') if i > 0: if url[:i] == 'http': # optimize the common case From 4df550d8e482e29129ff9431dda65229a51d2aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 16:37:37 +0200 Subject: [PATCH 05/17] Update parse.py --- Lib/urllib/parse.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index ad2844bdad8b94..1d3d14db421c22 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -105,7 +105,8 @@ def _encode_result(obj, encoding=_implicit_encoding, def _decode_args(args, encoding=_implicit_encoding, errors=_implicit_errors): - return tuple(x.decode(encoding, errors) if x else '' for x in args) + return tuple(x.decode(encoding, errors) if x else None if x is None else '' + for x in args) def _coerce_args(*args): # Invokes decode if necessary to create str args @@ -129,7 +130,9 @@ class _ResultMixinStr(object): __slots__ = () def encode(self, encoding='ascii', errors='strict'): - return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) + return self._encoded_counterpart(*(x.encode(encoding, errors) + if x is not None else None + for x in self)) class _ResultMixinBytes(object): @@ -137,7 +140,9 @@ class _ResultMixinBytes(object): __slots__ = () def decode(self, encoding='ascii', errors='strict'): - return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) + return self._decoded_counterpart(*(x.decode(encoding, errors) + if x is not None else None + for x in self)) class _NetlocResultMixinBase(object): From 2dfecf282fc25a862c601af253a3222e91183622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 16:57:06 +0200 Subject: [PATCH 06/17] Update test_urllib2.py --- Lib/test/test_urllib2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 7b576db4e3aaf4..05618127433711 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1080,9 +1080,7 @@ def test_full_url_setter(self): parsed = urlparse(url) self.assertEqual(r.get_full_url(), url) - # full_url setter uses splittag to split into components. - # splittag sets the fragment as None while urlparse sets it to '' - self.assertEqual(r.fragment or '', parsed.fragment) + self.assertEqual(r.fragment, parsed.fragment) self.assertEqual(urlparse(r.get_full_url()).query, parsed.query) def test_full_url_deleter(self): From 98d25e71c20ca6a666f512879c0f05230a3c25bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 18:12:15 +0200 Subject: [PATCH 07/17] Update test_urlparse.py --- Lib/test/test_urlparse.py | 107 +++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4ae6ed33858ce2..d263ad687b25a5 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -155,34 +155,34 @@ def test_qs(self): def test_roundtrips(self): str_cases = [ ('file:///tmp/junk.txt', - ('file', '', '/tmp/junk.txt', '', '', ''), - ('file', '', '/tmp/junk.txt', '', '')), + ('file', '', '/tmp/junk.txt', '', None, None), + ('file', '', '/tmp/junk.txt', None, None)), ('imap://mail.python.org/mbox1', - ('imap', 'mail.python.org', '/mbox1', '', '', ''), - ('imap', 'mail.python.org', '/mbox1', '', '')), + ('imap', 'mail.python.org', '/mbox1', '', None, None), + ('imap', 'mail.python.org', '/mbox1', None, None)), ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', - '', '', ''), + '', None, None), ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', - '', '')), + None, None)), ('nfs://server/path/to/file.txt', - ('nfs', 'server', '/path/to/file.txt', '', '', ''), - ('nfs', 'server', '/path/to/file.txt', '', '')), + ('nfs', 'server', '/path/to/file.txt', '', None, None), + ('nfs', 'server', '/path/to/file.txt', None, None)), ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', - '', '', ''), + '', None, None), ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', - '', '')), + None, None)), ('git+ssh://git@github.com/user/project.git', ('git+ssh', 'git@github.com','/user/project.git', - '','',''), + '', None, None), ('git+ssh', 'git@github.com','/user/project.git', - '', '')), + None, None)), ] def _encode(t): return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) + tuple(x.encode('ascii') if x is not None else None for x in t[1]), + tuple(x.encode('ascii') if x is not None else None for x in t[2])) bytes_cases = [_encode(x) for x in str_cases] for url, parsed, split in str_cases + bytes_cases: self.checkRoundtrips(url, parsed, split) @@ -193,25 +193,34 @@ def test_http_roundtrips(self): # Three cheers for white box knowledge! str_cases = [ ('://www.python.org', - ('www.python.org', '', '', '', ''), - ('www.python.org', '', '', '')), + ('www.python.org', '', '', None, None), + ('www.python.org', '', None, None)), ('://www.python.org#abc', - ('www.python.org', '', '', '', 'abc'), - ('www.python.org', '', '', 'abc')), + ('www.python.org', '', '', None, 'abc'), + ('www.python.org', '', None, 'abc')), ('://www.python.org?q=abc', - ('www.python.org', '', '', 'q=abc', ''), - ('www.python.org', '', 'q=abc', '')), + ('www.python.org', '', '', 'q=abc', None), + ('www.python.org', '', 'q=abc', None)), ('://www.python.org/#abc', - ('www.python.org', '/', '', '', 'abc'), - ('www.python.org', '/', '', 'abc')), + ('www.python.org', '/', '', None, 'abc'), + ('www.python.org', '/', None, 'abc')), ('://a/b/c/d;p?q#f', ('a', '/b/c/d', 'p', 'q', 'f'), ('a', '/b/c/d;p', 'q', 'f')), + ('://a/?', + ('a', '/', '', '', None), + ('a', '/', '', None)), + ('://a/#', + ('a', '/', '', None, ''), + ('a', '/', None, '')), + ('://a/?#', + ('a', '/', '', '', ''), + ('a', '/', '', '')), ] def _encode(t): return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) + tuple(x.encode('ascii') if x is not None else None for x in t[1]), + tuple(x.encode('ascii') if x is not None else None for x in t[2])) bytes_cases = [_encode(x) for x in str_cases] str_schemes = ('http', 'https') bytes_schemes = (b'http', b'https') @@ -290,7 +299,7 @@ def test_RFC1808(self): def test_RFC2368(self): # Issue 11467: path that starts with a number is not parsed correctly self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'), - ('mailto', '', '1337@example.org', '', '', '')) + ('mailto', '', '1337@example.org', '', None, None)) def test_RFC2396(self): # cases from RFC 2396 @@ -684,9 +693,9 @@ def test_attributes_without_netloc(self): def test_noslash(self): # Issue 1637: http://foo.com?query is legal self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), - ('http', 'example.com', '', '', 'blahblah=/foo', '')) + ('http', 'example.com', '', '', 'blahblah=/foo', None)) self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), - (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) + (b'http', b'example.com', b'', b'', b'blahblah=/foo', None)) def test_withoutscheme(self): # Test urlparse without scheme @@ -694,34 +703,34 @@ def test_withoutscheme(self): # RFC 1808 specifies that netloc should start with //, urlparse expects # the same, otherwise it classifies the portion of url as path. self.assertEqual(urllib.parse.urlparse("path"), - ('','','path','','','')) + ('','','path','',None,None)) self.assertEqual(urllib.parse.urlparse("//www.python.org:80"), - ('','www.python.org:80','','','','')) + ('','www.python.org:80','','',None,None)) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), - ('http','www.python.org:80','','','','')) + ('http','www.python.org:80','','',None,None)) # Repeat for bytes input self.assertEqual(urllib.parse.urlparse(b"path"), - (b'',b'',b'path',b'',b'',b'')) + (b'',b'',b'path',b'',None,None)) self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), - (b'',b'www.python.org:80',b'',b'',b'',b'')) + (b'',b'www.python.org:80',b'',b'',None,None)) self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), - (b'http',b'www.python.org:80',b'',b'',b'',b'')) + (b'http',b'www.python.org:80',b'',b'',None,None)) def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator self.assertEqual(urllib.parse.urlparse("path:80"), - ('','','path:80','','','')) - self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) - self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) + ('','','path:80','',None,None)) + self.assertEqual(urllib.parse.urlparse("http:"),('http','','','',None,None)) + self.assertEqual(urllib.parse.urlparse("https:"),('https','','','',None,None)) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), - ('http','www.python.org:80','','','','')) + ('http','www.python.org:80','','',None,None)) # As usual, need to check bytes input as well self.assertEqual(urllib.parse.urlparse(b"path:80"), - (b'',b'',b'path:80',b'',b'',b'')) - self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) - self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) + (b'',b'',b'path:80',b'',None,None)) + self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',None,None)) + self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',None,None)) self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), - (b'http',b'www.python.org:80',b'',b'',b'',b'')) + (b'http',b'www.python.org:80',b'',b'',None,None)) def test_usingsys(self): # Issue 3314: sys module is used in the error @@ -730,23 +739,23 @@ def test_usingsys(self): def test_anyscheme(self): # Issue 7904: s3://foo.com/stuff has netloc "foo.com". self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"), - ('s3', 'foo.com', '/stuff', '', '', '')) + ('s3', 'foo.com', '/stuff', '', None, None)) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), - ('x-newscheme', 'foo.com', '/stuff', '', '', '')) + ('x-newscheme', 'foo.com', '/stuff', '', None, None)) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), - ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) + ('x-newscheme', 'foo.com', '/stuff', '', 'query', None)) # And for bytes... self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), - (b's3', b'foo.com', b'/stuff', b'', b'', b'')) + (b's3', b'foo.com', b'/stuff', b'', None, None)) self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), - (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) + (b'x-newscheme', b'foo.com', b'/stuff', b'', None, None)) self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), - (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', None)) def test_default_scheme(self): # Exercise the scheme parameter of urlparse() and urlsplit() @@ -783,10 +792,10 @@ def test_parse_fragments(self): attr = "path" with self.subTest(url=url, function=func): result = func(url, allow_fragments=False) - self.assertEqual(result.fragment, "") + self.assertEqual(result.fragment, None) self.assertTrue( getattr(result, attr).endswith("#" + expected_frag)) - self.assertEqual(func(url, "", False).fragment, "") + self.assertEqual(func(url, "", False).fragment, None) result = func(url, allow_fragments=True) self.assertEqual(result.fragment, expected_frag) From a56112a9625d78c2da53d9532f540a92972fc165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 19:15:27 +0200 Subject: [PATCH 08/17] Update test_urlparse.py --- Lib/test/test_urlparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d263ad687b25a5..dc35f1cf3cb188 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -546,7 +546,7 @@ def test_urlsplit_attributes(self): self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "WWW.PYTHON.ORG") self.assertEqual(p.path, "/doc/") - self.assertEqual(p.query, "") + self.assertEqual(p.query, None) self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, None) self.assertEqual(p.password, None) From 7dc5a12bde6044a0501db01327c85caaae8780a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 20:35:32 +0200 Subject: [PATCH 09/17] Update test_urlparse.py --- Lib/test/test_urlparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index dc35f1cf3cb188..484e6a7bd67f2d 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -593,7 +593,7 @@ def test_urlsplit_attributes(self): self.assertEqual(p.scheme, b"http") self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") self.assertEqual(p.path, b"/doc/") - self.assertEqual(p.query, b"") + self.assertEqual(p.query, None) self.assertEqual(p.fragment, b"frag") self.assertEqual(p.username, None) self.assertEqual(p.password, None) From ad3994a5a0d26ea10a58ad6d2f5f3b43aafdc9e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 22:11:01 +0200 Subject: [PATCH 10/17] Update parse.py --- Lib/urllib/parse.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1d3d14db421c22..29100bb20beed0 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -579,16 +579,17 @@ def urljoin(base, url, allow_fragments=True): def urldefrag(url): """Removes any existing fragment from URL. - Returns a tuple of the defragmented URL and the fragment. If - the URL contained no fragments, the second element is the - empty string. + Returns a tuple of the defragmented URL and the fragment. + If the URL contained no fragment, the second element is None. + If the URL contained an empty fragment with its '#' delimiter, the second + element is the empty string. """ url, _coerce_result = _coerce_args(url) if '#' in url: s, n, p, a, q, frag = urlparse(url) - defrag = urlunparse((s, n, p, a, q, '')) + defrag = urlunparse((s, n, p, a, q, None)) else: - frag = '' + frag = None defrag = url return _coerce_result(DefragResult(defrag, frag)) From 6d70b49aefecb2f9baa960ed9a663f732e631099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 22:59:39 +0200 Subject: [PATCH 11/17] Update test_urlparse.py --- Lib/test/test_urlparse.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 484e6a7bd67f2d..dd948da7b5e0fa 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -511,15 +511,15 @@ def _encode(t): def test_urldefrag(self): str_cases = [ ('http://python.org#frag', 'http://python.org', 'frag'), - ('http://python.org', 'http://python.org', ''), + ('http://python.org', 'http://python.org', None), ('http://python.org/#frag', 'http://python.org/', 'frag'), - ('http://python.org/', 'http://python.org/', ''), + ('http://python.org/', 'http://python.org/', None), ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), - ('http://python.org/?q', 'http://python.org/?q', ''), + ('http://python.org/?q', 'http://python.org/?q', None), ('http://python.org/p#frag', 'http://python.org/p', 'frag'), - ('http://python.org/p?q', 'http://python.org/p?q', ''), + ('http://python.org/p?q', 'http://python.org/p?q', None), (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), - (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), + (RFC2396_BASE, 'http://a/b/c/d;p?q', None), ] def _encode(t): return type(t)(x.encode('ascii') for x in t) From 58947a58ad6029782c9bdaee13baaf19f50735eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 23:16:37 +0200 Subject: [PATCH 12/17] Update test_urlparse.py --- Lib/test/test_urlparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index dd948da7b5e0fa..1ccfa8158ed382 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -523,7 +523,7 @@ def test_urldefrag(self): ] def _encode(t): return type(t)(x.encode('ascii') for x in t) - bytes_cases = [_encode(x) for x in str_cases] + bytes_cases = [_encode(x) if x is not None else None for x in str_cases] for url, defrag, frag in str_cases + bytes_cases: result = urllib.parse.urldefrag(url) self.assertEqual(result.geturl(), url) From db39e9694d6b902eb83b9140421120785418334d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Mon, 2 Sep 2019 23:35:53 +0200 Subject: [PATCH 13/17] Update test_urlparse.py --- Lib/test/test_urlparse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 1ccfa8158ed382..26a45f3e1aa3d6 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -522,8 +522,8 @@ def test_urldefrag(self): (RFC2396_BASE, 'http://a/b/c/d;p?q', None), ] def _encode(t): - return type(t)(x.encode('ascii') for x in t) - bytes_cases = [_encode(x) if x is not None else None for x in str_cases] + return type(t)(x.encode('ascii') if x is not None else None for x in t) + bytes_cases = [_encode(x) for x in str_cases] for url, defrag, frag in str_cases + bytes_cases: result = urllib.parse.urldefrag(url) self.assertEqual(result.geturl(), url) From 67465d2145617b8ffc3bcc2f85078f9ec15b0fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Tue, 3 Sep 2019 00:04:23 +0200 Subject: [PATCH 14/17] Update test_urlparse.py --- Lib/urllib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 29100bb20beed0..a100b3284b06d0 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -582,7 +582,7 @@ def urldefrag(url): Returns a tuple of the defragmented URL and the fragment. If the URL contained no fragment, the second element is None. If the URL contained an empty fragment with its '#' delimiter, the second - element is the empty string. + element is the empty string. """ url, _coerce_result = _coerce_args(url) if '#' in url: From d6bd27fdebaa0cffb71a7c65a1692e225142280b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Tue, 10 Sep 2019 17:12:00 +0200 Subject: [PATCH 15/17] Create 2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst --- .../next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst diff --git a/Misc/NEWS.d/next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst b/Misc/NEWS.d/next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst new file mode 100644 index 00000000000000..248fcb74a1fb4b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst @@ -0,0 +1,5 @@ +Make the :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlunsplit` +functions keep the ``?`` delimiter in a URI with an empty query component and +keep the ``#`` delimiter in a URI with an empty fragment component, as required +by `RFC 3986 `_. Patch by +Géry Ogam. From 2c8a28636a408bead35e38a3634e6b10fb34c260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Fri, 2 Dec 2022 13:16:37 +0100 Subject: [PATCH 16/17] =?UTF-8?q?Use=20RFC=203986=20terminology=20(?= =?UTF-8?q?=E2=80=98undefined=E2=80=99)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/urllib/parse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 789fcf07e5ff04..c860aadae5e629 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -464,7 +464,7 @@ def urlsplit(url, scheme='', allow_fragments=True): allow_fragments = bool(allow_fragments) netloc = '' - query = fragment = None # the query and fragment are absent + query = fragment = None # the query and fragment are undefined i = url.find(':') if i > 0 and url[0].isascii() and url[0].isalpha(): for c in url[:i]: @@ -508,9 +508,9 @@ def urlunsplit(components): url = '//' + (netloc or '') + url if scheme: url = scheme + ':' + url - if query is not None: # the query is present + if query is not None: # the query is defined url = url + '?' + query - if fragment is not None: # the fragment is present + if fragment is not None: # the fragment is defined url = url + '#' + fragment return _coerce_result(url) From 8183cf7b0120afb2c91b41268df014e157944259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Fri, 2 Dec 2022 19:00:58 +0100 Subject: [PATCH 17/17] Update doc tests --- Doc/library/urllib.parse.rst | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 15c4f67442cf4c..d69386588e7b7c 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -70,7 +70,7 @@ or on combining URL components into a URL string. >>> o.port 80 >>> o._replace(fragment="").geturl() - 'http://docs.python.org:80/3/library/urllib.parse.html?highlight=params' + 'http://docs.python.org:80/3/library/urllib.parse.html?highlight=params#' Following the syntax specifications in :rfc:`1808`, urlparse recognizes a netloc only if it is properly introduced by '//'. Otherwise the @@ -83,13 +83,13 @@ or on combining URL components into a URL string. >>> from urllib.parse import urlparse >>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html') ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', - params='', query='', fragment='') + params='', query=None, fragment=None) >>> urlparse('www.cwi.nl/%7Eguido/Python.html') ParseResult(scheme='', netloc='', path='www.cwi.nl/%7Eguido/Python.html', - params='', query='', fragment='') + params='', query=None, fragment=None) >>> urlparse('help/Python.html') ParseResult(scheme='', netloc='', path='help/Python.html', params='', - query='', fragment='') + query=None, fragment=None) The *scheme* argument gives the default addressing scheme, to be used only if the URL does not specify one. It should be the same type @@ -154,10 +154,10 @@ or on combining URL components into a URL string. >>> u = urlparse('//www.cwi.nl:80/%7Eguido/Python.html') >>> u ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', - params='', query='', fragment='') + params='', query=None, fragment=None) >>> u._replace(scheme='http') ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', - params='', query='', fragment='') + params='', query=None, fragment=None) .. versionchanged:: 3.2 @@ -458,32 +458,28 @@ individual URL quoting functions. Structured Parse Results ------------------------ -The result objects from the :func:`urlparse`, :func:`urlsplit` and +The result objects from the :func:`urlparse`, :func:`urlsplit` and :func:`urldefrag` functions are subclasses of the :class:`tuple` type. These subclasses add the attributes listed in the documentation for those functions, the encoding and decoding support described in the previous section, as well as an additional method: -.. method:: urllib.parse.SplitResult.geturl() +.. method:: urllib.parse.ParseResult.geturl() Return the re-combined version of the original URL as a string. This may - differ from the original URL in that the scheme may be normalized to lower - case and empty components may be dropped. Specifically, empty parameters, - queries, and fragment identifiers will be removed. - - For :func:`urldefrag` results, only empty fragment identifiers will be removed. - For :func:`urlsplit` and :func:`urlparse` results, all noted changes will be - made to the URL returned by this method. + differ from the original URL in that the scheme will be normalized to lower + case for :func:`urlparse`, :func:`urlsplit` and :func:`urldefrag` results, + and empty parameters will be removed for :func:`urlparse` results. The result of this method remains unchanged if passed back through the original parsing function: - >>> from urllib.parse import urlsplit - >>> url = 'HTTP://www.Python.org/doc/#' - >>> r1 = urlsplit(url) + >>> from urllib.parse import urlparse + >>> url = 'HTTP://www.Python.org/doc/;' + >>> r1 = urlparse(url) >>> r1.geturl() 'http://www.Python.org/doc/' - >>> r2 = urlsplit(r1.geturl()) + >>> r2 = urlparse(r1.geturl()) >>> r2.geturl() 'http://www.Python.org/doc/'