Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Python parser chunked handling with multiple Transfer-Encoding values #8823

Merged
merged 5 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/8823.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed Python parser chunked handling with multiple Transfer-Encoding values -- by :user:`Dreamsorcerer`.
20 changes: 15 additions & 5 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,10 @@ def __init__(
self._headers_parser = HeadersParser(max_line_size, max_field_size, self.lax)

@abc.abstractmethod
def parse_message(self, lines: List[bytes]) -> _MsgT:
pass
def parse_message(self, lines: List[bytes]) -> _MsgT: ...
Dismissed Show dismissed Hide dismissed

@abc.abstractmethod
def _is_chunked_te(self, te: str) -> bool: ...
Dismissed Show dismissed Hide dismissed

def feed_eof(self) -> Optional[_MsgT]:
if self._payload_parser is not None:
Expand Down Expand Up @@ -525,10 +527,8 @@ def parse_headers(
# chunking
te = headers.get(hdrs.TRANSFER_ENCODING)
if te is not None:
if "chunked" == te.lower():
if self._is_chunked_te(te):
chunked = True
else:
raise BadHttpMessage("Request has invalid `Transfer-Encoding`")

if hdrs.CONTENT_LENGTH in headers:
raise BadHttpMessage(
Expand Down Expand Up @@ -638,6 +638,12 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
url,
)

def _is_chunked_te(self, te: str) -> bool:
if te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked":
return True
# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3
raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved


class HttpResponseParser(HttpParser[RawResponseMessage]):
"""Read response status line and headers.
Expand Down Expand Up @@ -723,6 +729,10 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
chunked,
)

def _is_chunked_te(self, te: str) -> bool:
# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2
return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved


class HttpPayloadParser:
def __init__(
Expand Down
39 changes: 39 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
2**16,
max_line_size=8190,
max_field_size=8190,
read_until_eof=True,
)


Expand Down Expand Up @@ -532,6 +533,23 @@
parser.feed_data(text)


async def test_request_te_last_chunked(parser: HttpRequestParser) -> None:
text = b"GET /test HTTP/1.1\r\nTransfer-Encoding: not, chunked\r\n\r\n1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3
assert await messages[0][1].read() == b"Test"
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved


def test_request_te_first_chunked(parser: HttpRequestParser) -> None:
text = b"GET /test HTTP/1.1\r\nTransfer-Encoding: chunked, not\r\n\r\n1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3
with pytest.raises(
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
http_exceptions.BadHttpMessage,
match="nvalid `Transfer-Encoding`",
):
parser.feed_data(text)


def test_conn_upgrade(parser: HttpRequestParser) -> None:
text = (
b"GET /test HTTP/1.1\r\n"
Expand Down Expand Up @@ -1191,6 +1209,27 @@
response.feed_data(text)


async def test_http_response_parser_notchunked(
response: HttpResponseParser,
) -> None:
text = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: notchunked\r\n\r\n1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
messages, upgrade, tail = response.feed_data(text)
response.feed_eof()

https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2

Check failure

Code scanning / CodeQL

Syntax error Error test

Syntax Error (in Python 3).
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
assert await messages[0][1].read() == b"1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved


async def test_http_response_parser_last_chunked(
response: HttpResponseParser,
) -> None:
text = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: not, chunked\r\n\r\n1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
messages, upgrade, tail = response.feed_data(text)

# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2
assert await messages[0][1].read() == b"Test"
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved


def test_http_response_parser_bad(response: HttpResponseParser) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
response.feed_data(b"HTT/1\r\n\r\n")
Expand Down