Skip to content

Commit

Permalink
Fix some errors in unquoter
Browse files Browse the repository at this point in the history
* No longer raise ValueError for % followed by non-hexdigits.
* Fix decoding % followed by a space and hexdigit.
  • Loading branch information
serhiy-storchaka committed Sep 28, 2020
1 parent f12b4d0 commit 8c432d3
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGES/516.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix ValueError when decoding ``%`` which is not followed by two hexadecimal digits.
1 change: 1 addition & 0 deletions CHANGES/520.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix decoding ``%`` followed by a space and hexadecimal digit.
40 changes: 22 additions & 18 deletions tests/test_quoting.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,25 +182,29 @@ def test_unquoting(num, unquoter):
assert expect == result


@pytest.mark.xfail
# FIXME: Expected value should be the same as given.
# See https://url.spec.whatwg.org/#percent-encoded-bytes
def test_unquoting_bad_percent_escapes_1(unquoter):
assert "%" == unquoter()("%")


@pytest.mark.xfail
# FIXME: Expected value should be the same as given.
# See https://url.spec.whatwg.org/#percent-encoded-bytes
def test_unquoting_bad_percent_escapes_2(unquoter):
assert "%x" == unquoter()("%x")


@pytest.mark.xfail
# FIXME: Expected value should be the same as given.
# Expected value should be the same as given.
# See https://url.spec.whatwg.org/#percent-encoded-bytes
def test_unquoting_bad_percent_escapes_3(unquoter):
assert "%xa" == unquoter()("%xa")
@pytest.mark.parametrize(
("input", "expected"),
[
("%", "%"),
("%2", "%2"),
("%x", "%x"),
("%€", "%€"),
("%2x", "%2x"),
("%2 ", "%2 "),
("% 2", "% 2"),
("%xa", "%xa"),
("%%", "%%"),
("%%3f", "%?"),
("%2%", "%2%"),
("%2%3f", "%2?"),
("%x%3f", "%x?"),
("%€%3f", "%€?"),
],
)
def test_unquoting_bad_percent_escapes(unquoter, input, expected):
assert unquoter()(input) == expected


@pytest.mark.xfail
Expand Down
26 changes: 15 additions & 11 deletions yarl/_quoting_c.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -328,19 +328,17 @@ cdef class _Unquoter:
cdef str _do_unquote(self, str val):
if len(val) == 0:
return val
cdef str pct = ''
cdef str last_pct = ''
cdef bytearray pcts = bytearray()
cdef list ret = []
cdef str unquoted
for ch in val:
if pct:
pct += ch
if len(pct) == 3: # pragma: no branch # peephole optimizer
pcts.append(int(pct[1:], base=16))
last_pct = pct
pct = ''
continue
cdef Py_UCS4 ch = 0
cdef int idx = 0
cdef int length = len(val)

while idx < length:
ch = val[idx]
idx += 1
if pcts:
try:
unquoted = pcts.decode('utf8')
Expand All @@ -355,8 +353,14 @@ cdef class _Unquoter:
ret.append(unquoted)
del pcts[:]

if ch == '%':
pct = ch
if ch == '%' and idx <= length - 2:
ch = _restore_ch(val[idx], val[idx + 1])
if ch == <Py_UCS4>-1:
ret.append("%")
else:
pcts.append(ch)
last_pct = val[idx - 1 : idx + 2]
idx += 2
continue

if pcts:
Expand Down
28 changes: 16 additions & 12 deletions yarl/_quoting_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]")
_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]")


class _Quoter:
Expand Down Expand Up @@ -126,18 +127,13 @@ def __call__(self, val: Optional[str]) -> Optional[str]:
raise TypeError("Argument should be str")
if not val:
return ""
pct = ""
last_pct = ""
pcts = bytearray()
ret = []
for ch in val:
if pct:
pct += ch
if len(pct) == 3: # pragma: no branch # peephole optimizer
pcts.append(int(pct[1:], base=16))
last_pct = pct
pct = ""
continue
idx = 0
while idx < len(val):
ch = val[idx]
idx += 1
if pcts:
try:
unquoted = pcts.decode("utf8")
Expand All @@ -158,9 +154,17 @@ def __call__(self, val: Optional[str]) -> Optional[str]:
ret.append(unquoted)
del pcts[:]

if ch == "%":
pct = ch
continue
if ch == "%" and idx <= len(val) - 2:
pct = val[idx : idx + 2] # noqa: E203
if _IS_HEX_STR.fullmatch(pct):
try:
pcts.append(int(pct, base=16))
except ValueError:
ret.append("%")
else:
last_pct = "%" + pct
idx += 2
continue

if pcts:
ret.append(last_pct) # %F8ab
Expand Down

0 comments on commit 8c432d3

Please sign in to comment.