Skip to content

Commit

Permalink
make hyperlink handle + like an HTML form post by default
Browse files Browse the repository at this point in the history
  • Loading branch information
glyph committed Dec 28, 2020
1 parent e5cd7e2 commit ad88c47
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 14 deletions.
70 changes: 57 additions & 13 deletions src/hyperlink/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
)
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc

NO_QUERY_PLUS_SCHEMES = set()

def register_scheme(text, uses_netloc=True, default_port=None):
# type: (Text, bool, Optional[int]) -> None

def register_scheme(
text, uses_netloc=True, default_port=None, query_plus_is_space=True
):
# type: (Text, bool, Optional[int], bool) -> None
"""Registers new scheme information, resulting in correct port and
slash behavior from the URL object. There are dozens of standard
schemes preregistered, so this function is mostly meant for
Expand All @@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
not. Defaults to True.
default_port: The default port, if any, for
netloc-using schemes.
query_plus_is_space: If true, a "+" in the query string should be
decoded as a space by DecodedURL.
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
"""
Expand All @@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
else:
raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)

if not query_plus_is_space:
NO_QUERY_PLUS_SCHEMES.add(text)

return


Expand Down Expand Up @@ -1969,6 +1978,16 @@ def remove(
_EMPTY_URL = URL()


def _replace_plus(text):
# type: (Text) -> Text
return text.replace("+", "%20")


def _no_op(text):
# type: (Text) -> Text
return text


class DecodedURL(object):
"""
:class:`DecodedURL` is a type designed to act as a higher-level
Expand Down Expand Up @@ -1998,6 +2017,9 @@ class DecodedURL(object):
lazy: Set to True to avoid pre-decode all parts of the URL to check for
validity.
Defaults to False.
query_plus_is_space: + characters in the query string should be treated
as spaces when decoding. If unspecified, the default is taken from
the scheme.
.. note::
Expand All @@ -2012,18 +2034,21 @@ class DecodedURL(object):
.. versionadded:: 18.0.0
"""

def __init__(self, url=_EMPTY_URL, lazy=False):
# type: (URL, bool) -> None
def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
# type: (URL, bool, Optional[bool]) -> None
self._url = url
if query_plus_is_space is None:
query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
self._query_plus_is_space = query_plus_is_space
if not lazy:
# cache the following, while triggering any decoding
# issues with decodable fields
self.host, self.userinfo, self.path, self.query, self.fragment
return

@classmethod
def from_text(cls, text, lazy=False):
# type: (Text, bool) -> DecodedURL
def from_text(cls, text, lazy=False, query_plus_is_space=None):
# type: (Text, bool, Optional[bool]) -> DecodedURL
"""\
Make a `DecodedURL` instance from any text string containing a URL.
Expand All @@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
Defaults to True.
"""
_url = URL.from_text(text)
return cls(_url, lazy=lazy)
return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)

@property
def encoded_url(self):
Expand All @@ -2059,22 +2084,34 @@ def to_iri(self):
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
return self._url.to_iri()

def _clone(self, url):
# type: (URL) -> DecodedURL
return self.__class__(
url,
# TODO: propagate laziness?
query_plus_is_space=self._query_plus_is_space,
)

def click(self, href=u""):
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
"""Return a new DecodedURL wrapping the result of
:meth:`~hyperlink.URL.click()`
"""
if isinstance(href, DecodedURL):
href = href._url
return self.__class__(self._url.click(href=href))
return self._clone(
self._url.click(href=href),
)

def sibling(self, segment):
# type: (Text) -> DecodedURL
"""Automatically encode any reserved characters in *segment* and
return a new `DecodedURL` wrapping the result of
:meth:`~hyperlink.URL.sibling()`
"""
return self.__class__(self._url.sibling(_encode_reserved(segment)))
return self._clone(
self._url.sibling(_encode_reserved(segment)),
)

def child(self, *segments):
# type: (Text) -> DecodedURL
Expand All @@ -2085,7 +2122,7 @@ def child(self, *segments):
if not segments:
return self
new_segs = [_encode_reserved(s) for s in segments]
return self.__class__(self._url.child(*new_segs))
return self._clone(self._url.child(*new_segs))

def normalize(
self,
Expand All @@ -2101,7 +2138,7 @@ def normalize(
"""Return a new `DecodedURL` wrapping the result of
:meth:`~hyperlink.URL.normalize()`
"""
return self.__class__(
return self._clone(
self._url.normalize(
scheme, host, path, query, fragment, userinfo, percents
)
Expand Down Expand Up @@ -2148,11 +2185,18 @@ def path(self):
def query(self):
# type: () -> QueryPairs
if not hasattr(self, "_query"):
if self._query_plus_is_space:
predecode = _replace_plus
else:
predecode = _no_op

self._query = cast(
QueryPairs,
tuple(
tuple(
_percent_decode(x, raise_subencoding_exc=True)
_percent_decode(
predecode(x), raise_subencoding_exc=True
)
if x is not None
else None
for x in (k, v)
Expand Down Expand Up @@ -2248,7 +2292,7 @@ def replace(
userinfo=userinfo_text,
uses_netloc=uses_netloc,
)
return self.__class__(url=new_url)
return self._clone(url=new_url)

def get(self, name):
# type: (Text) -> List[Optional[Text]]
Expand Down
16 changes: 16 additions & 0 deletions src/hyperlink/test/test_decoded_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,19 @@ def test_click_decoded_url(self):
assert clicked.host == durl.host
assert clicked.path == durl_dest.path
assert clicked.path == ("tëst",)

def test_decode_plus(self):
# type: () -> None
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
assert durl.path == ("x+y+",)
assert durl.get("a") == ["b c+"]
assert durl.query == (("a", "b c+"),)

def test_decode_nonplussed(self):
# type: () -> None
durl = DecodedURL.from_text(
"/x+y%2B?a=b+c%2B", query_plus_is_space=False
)
assert durl.path == ("x+y+",)
assert durl.get("a") == ["b+c+"]
assert durl.query == (("a", "b+c+"),)
12 changes: 11 additions & 1 deletion src/hyperlink/test/test_scheme_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .. import _url
from .common import HyperlinkTestCase
from .._url import register_scheme, URL
from .._url import register_scheme, URL, DecodedURL


class TestSchemeRegistration(HyperlinkTestCase):
Expand Down Expand Up @@ -70,3 +70,13 @@ def test_register_invalid_port(self):
# type: () -> None
with self.assertRaises(ValueError):
register_scheme("nope", default_port=cast(bool, object()))

def test_register_no_quote_plus_scheme(self):
# type: () -> None
register_scheme("keepplus", query_plus_is_space=False)
plus_is_not_space = DecodedURL.from_text(
"keepplus://example.com/?q=a+b"
)
plus_is_space = DecodedURL.from_text("https://example.com/?q=a+b")
assert plus_is_not_space.get("q") == ["a+b"]
assert plus_is_space.get("q") == ["a b"]

0 comments on commit ad88c47

Please sign in to comment.