python · geryogam · Sep 2, 2019 · Sep 2, 2019 · Sep 2, 2019 · Sep 2, 2019
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
@@ -239,10 +239,7 @@ or on combining URL components into a URL string.
 .. function:: urlunparse(parts)
 
    Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
-   argument can be any six-item iterable. This may result in a slightly
-   different, but equivalent URL, if the URL that was parsed originally had
-   unnecessary delimiters (for example, a ``?`` with an empty query; the RFC
-   states that these are equivalent).
+   argument can be any six-item iterable.
 
 
 .. function:: urlsplit(urlstring, scheme='', allow_fragments=True)
@@ -307,9 +304,7 @@ or on combining URL components into a URL string.
 
    Combine the elements of a tuple as returned by :func:`urlsplit` into a
    complete URL as a string. The *parts* argument can be any five-item
-   iterable. This may result in a slightly different, but equivalent URL, if the
-   URL that was parsed originally had unnecessary delimiters (for example, a ?
-   with an empty query; the RFC states that these are equivalent).
+   iterable.
 
 
 .. function:: urljoin(base, url, allow_fragments=True)

diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
@@ -1080,9 +1080,7 @@ def test_full_url_setter(self):
             parsed = urlparse(url)
 
             self.assertEqual(r.get_full_url(), url)
-            # full_url setter uses splittag to split into components.
-            # splittag sets the fragment as None while urlparse sets it to ''
-            self.assertEqual(r.fragment or '', parsed.fragment)
+            self.assertEqual(r.fragment, parsed.fragment)
             self.assertEqual(urlparse(r.get_full_url()).query, parsed.query)
 
     def test_full_url_deleter(self):

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -155,34 +155,34 @@ def test_qs(self):
     def test_roundtrips(self):
         str_cases = [
             ('file:///tmp/junk.txt',
-             ('file', '', '/tmp/junk.txt', '', '', ''),
-             ('file', '', '/tmp/junk.txt', '', '')),
+             ('file', '', '/tmp/junk.txt', '', None, None),
+             ('file', '', '/tmp/junk.txt', None, None)),
             ('imap://mail.python.org/mbox1',
-             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
-             ('imap', 'mail.python.org', '/mbox1', '', '')),
+             ('imap', 'mail.python.org', '/mbox1', '', None, None),
+             ('imap', 'mail.python.org', '/mbox1', None, None)),
             ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
              ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
-              '', '', ''),
+              '', None, None),
              ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
-              '', '')),
+              None, None)),
             ('nfs://server/path/to/file.txt',
-             ('nfs', 'server', '/path/to/file.txt', '', '', ''),
-             ('nfs', 'server', '/path/to/file.txt', '', '')),
+             ('nfs', 'server', '/path/to/file.txt', '', None, None),
+             ('nfs', 'server', '/path/to/file.txt', None, None)),
             ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
              ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
-              '', '', ''),
+              '', None, None),
              ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
-              '', '')),
+              None, None)),
             ('git+ssh://[email protected]/user/project.git',
             ('git+ssh', '[email protected]','/user/project.git',
-             '','',''),
+             '', None, None),
             ('git+ssh', '[email protected]','/user/project.git',
-             '', '')),
+             None, None)),
             ]
         def _encode(t):
             return (t[0].encode('ascii'),
-                    tuple(x.encode('ascii') for x in t[1]),
-                    tuple(x.encode('ascii') for x in t[2]))
+                    tuple(x.encode('ascii') if x is not None else None for x in t[1]),
+                    tuple(x.encode('ascii') if x is not None else None for x in t[2]))
         bytes_cases = [_encode(x) for x in str_cases]
         for url, parsed, split in str_cases + bytes_cases:
             self.checkRoundtrips(url, parsed, split)
@@ -193,25 +193,34 @@ def test_http_roundtrips(self):
         # Three cheers for white box knowledge!
         str_cases = [
             ('://www.python.org',
-             ('www.python.org', '', '', '', ''),
-             ('www.python.org', '', '', '')),
+             ('www.python.org', '', '', None, None),
+             ('www.python.org', '', None, None)),
             ('://www.python.org#abc',
-             ('www.python.org', '', '', '', 'abc'),
-             ('www.python.org', '', '', 'abc')),
+             ('www.python.org', '', '', None, 'abc'),
+             ('www.python.org', '', None, 'abc')),
             ('://www.python.org?q=abc',
-             ('www.python.org', '', '', 'q=abc', ''),
-             ('www.python.org', '', 'q=abc', '')),
+             ('www.python.org', '', '', 'q=abc', None),
+             ('www.python.org', '', 'q=abc', None)),
             ('://www.python.org/#abc',
-             ('www.python.org', '/', '', '', 'abc'),
-             ('www.python.org', '/', '', 'abc')),
+             ('www.python.org', '/', '', None, 'abc'),
+             ('www.python.org', '/', None, 'abc')),
             ('://a/b/c/d;p?q#f',
              ('a', '/b/c/d', 'p', 'q', 'f'),
              ('a', '/b/c/d;p', 'q', 'f')),
+            ('://a/?',
+             ('a', '/', '', '', None),
+             ('a', '/', '', None)),
+            ('://a/#',
+             ('a', '/', '', None, ''),
+             ('a', '/', None, '')),
+            ('://a/?#',
+             ('a', '/', '', '', ''),
+             ('a', '/', '', '')),
             ]
         def _encode(t):
             return (t[0].encode('ascii'),
-                    tuple(x.encode('ascii') for x in t[1]),
-                    tuple(x.encode('ascii') for x in t[2]))
+                    tuple(x.encode('ascii') if x is not None else None for x in t[1]),
+                    tuple(x.encode('ascii') if x is not None else None for x in t[2]))
         bytes_cases = [_encode(x) for x in str_cases]
         str_schemes = ('http', 'https')
         bytes_schemes = (b'http', b'https')
@@ -290,7 +299,7 @@ def test_RFC1808(self):
     def test_RFC2368(self):
         # Issue 11467: path that starts with a number is not parsed correctly
         self.assertEqual(urllib.parse.urlparse('mailto:[email protected]'),
-                ('mailto', '', '[email protected]', '', '', ''))
+                ('mailto', '', '[email protected]', '', None, None))
 
     def test_RFC2396(self):
         # cases from RFC 2396
@@ -502,18 +511,18 @@ def _encode(t):
     def test_urldefrag(self):
         str_cases = [
             ('http://python.org#frag', 'http://python.org', 'frag'),
-            ('http://python.org', 'http://python.org', ''),
+            ('http://python.org', 'http://python.org', None),
             ('http://python.org/#frag', 'http://python.org/', 'frag'),
-            ('http://python.org/', 'http://python.org/', ''),
+            ('http://python.org/', 'http://python.org/', None),
             ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
-            ('http://python.org/?q', 'http://python.org/?q', ''),
+            ('http://python.org/?q', 'http://python.org/?q', None),
             ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
-            ('http://python.org/p?q', 'http://python.org/p?q', ''),
+            ('http://python.org/p?q', 'http://python.org/p?q', None),
             (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
-            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
+            (RFC2396_BASE, 'http://a/b/c/d;p?q', None),
         ]
         def _encode(t):
-            return type(t)(x.encode('ascii') for x in t)
+            return type(t)(x.encode('ascii') if x is not None else None for x in t)
         bytes_cases = [_encode(x) for x in str_cases]
         for url, defrag, frag in str_cases + bytes_cases:
             result = urllib.parse.urldefrag(url)
@@ -537,7 +546,7 @@ def test_urlsplit_attributes(self):
         self.assertEqual(p.scheme, "http")
         self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
         self.assertEqual(p.path, "/doc/")
-        self.assertEqual(p.query, "")
+        self.assertEqual(p.query, None)
         self.assertEqual(p.fragment, "frag")
         self.assertEqual(p.username, None)
         self.assertEqual(p.password, None)
@@ -584,7 +593,7 @@ def test_urlsplit_attributes(self):
         self.assertEqual(p.scheme, b"http")
         self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
         self.assertEqual(p.path, b"/doc/")
-        self.assertEqual(p.query, b"")
+        self.assertEqual(p.query, None)
         self.assertEqual(p.fragment, b"frag")
         self.assertEqual(p.username, None)
         self.assertEqual(p.password, None)
@@ -684,44 +693,44 @@ def test_attributes_without_netloc(self):
     def test_noslash(self):
         # Issue 1637: http://foo.com?query is legal
         self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
-                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
+                         ('http', 'example.com', '', '', 'blahblah=/foo', None))
         self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
-                         (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
+                         (b'http', b'example.com', b'', b'', b'blahblah=/foo', None))
 
     def test_withoutscheme(self):
         # Test urlparse without scheme
         # Issue 754016: urlparse goes wrong with IP:port without scheme
         # RFC 1808 specifies that netloc should start with //, urlparse expects
         # the same, otherwise it classifies the portion of url as path.
         self.assertEqual(urllib.parse.urlparse("path"),
-                ('','','path','','',''))
+                ('','','path','',None,None))
         self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
-                ('','www.python.org:80','','','',''))
+                ('','www.python.org:80','','',None,None))
         self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
-                ('http','www.python.org:80','','','',''))
+                ('http','www.python.org:80','','',None,None))
         # Repeat for bytes input
         self.assertEqual(urllib.parse.urlparse(b"path"),
-                (b'',b'',b'path',b'',b'',b''))
+                (b'',b'',b'path',b'',None,None))
         self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
-                (b'',b'www.python.org:80',b'',b'',b'',b''))
+                (b'',b'www.python.org:80',b'',b'',None,None))
         self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
-                (b'http',b'www.python.org:80',b'',b'',b'',b''))
+                (b'http',b'www.python.org:80',b'',b'',None,None))
 
     def test_portseparator(self):
         # Issue 754016 makes changes for port separator ':' from scheme separator
         self.assertEqual(urllib.parse.urlparse("path:80"),
-                ('','','path:80','','',''))
-        self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
-        self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
+                ('','','path:80','',None,None))
+        self.assertEqual(urllib.parse.urlparse("http:"),('http','','','',None,None))
+        self.assertEqual(urllib.parse.urlparse("https:"),('https','','','',None,None))
         self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
-                ('http','www.python.org:80','','','',''))
+                ('http','www.python.org:80','','',None,None))
         # As usual, need to check bytes input as well
         self.assertEqual(urllib.parse.urlparse(b"path:80"),
-                (b'',b'',b'path:80',b'',b'',b''))
-        self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
-        self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
+                (b'',b'',b'path:80',b'',None,None))
+        self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',None,None))
+        self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',None,None))
         self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
-                (b'http',b'www.python.org:80',b'',b'',b'',b''))
+                (b'http',b'www.python.org:80',b'',b'',None,None))
 
     def test_usingsys(self):
         # Issue 3314: sys module is used in the error
@@ -730,23 +739,23 @@ def test_usingsys(self):
     def test_anyscheme(self):
         # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
         self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
-                         ('s3', 'foo.com', '/stuff', '', '', ''))
+                         ('s3', 'foo.com', '/stuff', '', None, None))
         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
-                         ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
+                         ('x-newscheme', 'foo.com', '/stuff', '', None, None))
         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
                          ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
-                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
+                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', None))
 
         # And for bytes...
         self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
-                         (b's3', b'foo.com', b'/stuff', b'', b'', b''))
+                         (b's3', b'foo.com', b'/stuff', b'', None, None))
         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
-                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
+                         (b'x-newscheme', b'foo.com', b'/stuff', b'', None, None))
         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
                          (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
-                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
+                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', None))
 
     def test_default_scheme(self):
         # Exercise the scheme parameter of urlparse() and urlsplit()
@@ -783,10 +792,10 @@ def test_parse_fragments(self):
                     attr = "path"
                 with self.subTest(url=url, function=func):
                     result = func(url, allow_fragments=False)
-                    self.assertEqual(result.fragment, "")
+                    self.assertEqual(result.fragment, None)
                     self.assertTrue(
                             getattr(result, attr).endswith("#" + expected_frag))
-                    self.assertEqual(func(url, "", False).fragment, "")
+                    self.assertEqual(func(url, "", False).fragment, None)
 
                     result = func(url, allow_fragments=True)
                     self.assertEqual(result.fragment, expected_frag)

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
@@ -105,7 +105,8 @@ def _encode_result(obj, encoding=_implicit_encoding,
 
 def _decode_args(args, encoding=_implicit_encoding,
                        errors=_implicit_errors):
-    return tuple(x.decode(encoding, errors) if x else '' for x in args)
+    return tuple(x.decode(encoding, errors) if x else None if x is None else ''
+                 for x in args)
 
 def _coerce_args(*args):
     # Invokes decode if necessary to create str args
@@ -129,15 +130,19 @@ class _ResultMixinStr(object):
     __slots__ = ()
 
     def encode(self, encoding='ascii', errors='strict'):
-        return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
+        return self._encoded_counterpart(*(x.encode(encoding, errors)
+                                           if x is not None else None
+                                           for x in self))
 
 
 class _ResultMixinBytes(object):
     """Standard approach to decoding parsed results from bytes to str"""
     __slots__ = ()
 
     def decode(self, encoding='ascii', errors='strict'):
-        return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
+        return self._decoded_counterpart(*(x.decode(encoding, errors)
+                                           if x is not None else None
+                                           for x in self))
 
 
 class _NetlocResultMixinBase(object):
@@ -428,7 +433,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
         return _coerce_result(cached)
     if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
         clear_cache()
-    netloc = query = fragment = ''
+    netloc = ''
+    query = fragment = None  # no components NOR DELIMITERS
     i = url.find(':')
     if i > 0:
         if url[:i] == 'http': # optimize the common case
@@ -484,20 +490,20 @@ def urlunparse(components):
 
 def urlunsplit(components):
     """Combine the elements of a tuple as returned by urlsplit() into a
-    complete URL as a string. The data argument can be any five-item iterable.
-    This may result in a slightly different, but equivalent URL, if the URL that
-    was parsed originally had unnecessary delimiters (for example, a ? with an
-    empty query; the RFC states that these are equivalent)."""
+    complete URL as a string. The data argument can be any five-item
+    iterable."""
     scheme, netloc, url, query, fragment, _coerce_result = (
                                           _coerce_args(*components))
     if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
         if url and url[:1] != '/': url = '/' + url
         url = '//' + (netloc or '') + url
     if scheme:
         url = scheme + ':' + url
-    if query:
+    # keep the delimiter if present (even if the component is empty)
+    if query is not None:
         url = url + '?' + query
-    if fragment:
+    # keep the delimiter if present (even if the component is empty)
+    if fragment is not None:
         url = url + '#' + fragment
     return _coerce_result(url)
 
@@ -573,16 +579,17 @@ def urljoin(base, url, allow_fragments=True):
 def urldefrag(url):
     """Removes any existing fragment from URL.
 
-    Returns a tuple of the defragmented URL and the fragment.  If
-    the URL contained no fragments, the second element is the
-    empty string.
+    Returns a tuple of the defragmented URL and the fragment.
+    If the URL contained no fragment, the second element is None.
+    If the URL contained an empty fragment with its '#' delimiter, the second
+    element is the empty string.
     """
     url, _coerce_result = _coerce_args(url)
     if '#' in url:
         s, n, p, a, q, frag = urlparse(url)
-        defrag = urlunparse((s, n, p, a, q, ''))
+        defrag = urlunparse((s, n, p, a, q, None))
     else:
-        frag = ''
+        frag = None
         defrag = url
     return _coerce_result(DefragResult(defrag, frag))
 

diff --git a/Misc/NEWS.d/next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst b/Misc/NEWS.d/next/Library/2019-09-10-17-01-35.bpo-37969.5Dz8e7.rst
@@ -0,0 +1,5 @@
+Make the :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlunsplit` 
+functions keep the ``?`` delimiter in a URI with an empty query component and 
+keep the ``#`` delimiter in a URI with an empty fragment component, as required 
+by `RFC 3986 <https://tools.ietf.org/html/rfc3986?#section-6.2.3>`_. Patch by 
+Géry Ogam.