From e6695b013453bd2f0008d11cf12ee68e13e1aacd Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 27 Oct 2023 11:16:03 -0500 Subject: [PATCH 01/26] Fix py parser not treating 204/304/1xx as an empty body 204/304/1xx are now always treated as an empty body in the py parser to match the c parser and comply with https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 --- aiohttp/http_parser.py | 62 ++++++++++--------- tests/test_http_parser.py | 126 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 28 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 34f4d040c03..ca2b21b7cc7 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -338,12 +338,19 @@ def get_content_length() -> Optional[int]: self._upgraded = msg.upgrade method = getattr(msg, "method", self.method) + # code is only present on responses + code = getattr(msg, "code", 0) assert self.protocol is not None # calculate payload + # 204, 304, 1xx should not have a body per + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + code_indicates_empty_body = ( + code in (204, 304) or 100 <= code < 200 + ) if ( (length is not None and length > 0) - or msg.chunked + or (msg.chunked and not code_indicates_empty_body) and not msg.upgrade ): payload = StreamReader( @@ -383,34 +390,33 @@ def get_content_length() -> Optional[int]: auto_decompress=self._auto_decompress, lax=self.lax, ) + elif ( + not code_indicates_empty_body + and length is None + and self.read_until_eof + ): + payload = StreamReader( + self.protocol, + timer=self.timer, + loop=loop, + limit=self._limit, + ) + payload_parser = HttpPayloadParser( + payload, + length=length, + chunked=msg.chunked, + method=method, + compression=msg.compression, + code=self.code, + readall=True, + response_with_body=self.response_with_body, + auto_decompress=self._auto_decompress, + lax=self.lax, + ) + if not payload_parser.done: + self._payload_parser = payload_parser else: - if ( - getattr(msg, "code", 100) >= 199 - and length is None - and self.read_until_eof - ): - payload = StreamReader( - self.protocol, - timer=self.timer, - loop=loop, - limit=self._limit, - ) - payload_parser = HttpPayloadParser( - payload, - length=length, - chunked=msg.chunked, - method=method, - compression=msg.compression, - code=self.code, - readall=True, - response_with_body=self.response_with_body, - auto_decompress=self._auto_decompress, - lax=self.lax, - ) - if not payload_parser.done: - self._payload_parser = payload_parser - else: - payload = EMPTY_PAYLOAD + payload = EMPTY_PAYLOAD messages.append((msg, payload)) else: diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 8b4121be87d..880a10e0b59 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -19,6 +19,7 @@ HttpPayloadParser, HttpRequestParserPy, HttpResponseParserPy, + HttpVersion, ) try: @@ -1060,6 +1061,131 @@ def test_parse_no_length_payload(parser: Any) -> None: assert payload.is_eof() +def test_parse_content_length_payload_multiple(response: Any) -> None: + text = b"HTTP/1.1 200 OK\r\n" b"content-length: 5\r\n\r\n" b"first" + msg, payload = response.feed_data(text)[0][0] + assert msg.version == HttpVersion(major=1, minor=1) + assert msg.code == 200 + assert msg.reason == "OK" + assert msg.headers == CIMultiDict( + [ + ("Content-Length", "5"), + ] + ) + assert msg.raw_headers == ((b"content-length", b"5"),) + assert not msg.should_close + assert msg.compression is None + assert not msg.upgrade + assert not msg.chunked + assert payload.is_eof() + assert b"first" == b"".join(d for d in payload._buffer) + + text = b"HTTP/1.1 200 OK\r\n" b"content-length: 6\r\n\r\n" b"second" + msg, payload = response.feed_data(text)[0][0] + assert msg.version == HttpVersion(major=1, minor=1) + assert msg.code == 200 + assert msg.reason == "OK" + assert msg.headers == CIMultiDict( + [ + ("Content-Length", "6"), + ] + ) + assert msg.raw_headers == ((b"content-length", b"6"),) + assert not msg.should_close + assert msg.compression is None + assert not msg.upgrade + assert not msg.chunked + assert payload.is_eof() + assert b"second" == b"".join(d for d in payload._buffer) + + +def test_parse_content_length_than_chunked_payload(response: Any) -> None: + text = b"HTTP/1.1 200 OK\r\n" b"content-length: 5\r\n\r\n" b"first" + msg, payload = response.feed_data(text)[0][0] + assert msg.version == HttpVersion(major=1, minor=1) + assert msg.code == 200 + assert msg.reason == "OK" + assert msg.headers == CIMultiDict( + [ + ("Content-Length", "5"), + ] + ) + assert msg.raw_headers == ((b"content-length", b"5"),) + assert not msg.should_close + assert msg.compression is None + assert not msg.upgrade + assert not msg.chunked + assert payload.is_eof() + assert b"first" == b"".join(d for d in payload._buffer) + + text = ( + b"HTTP/1.1 200 OK\r\n" + b"transfer-encoding: chunked\r\n\r\n" + b"6\r\nsecond\r\n0\r\n\r\n" + ) + msg, payload = response.feed_data(text)[0][0] + assert msg.version == HttpVersion(major=1, minor=1) + assert msg.code == 200 + assert msg.reason == "OK" + assert msg.headers == CIMultiDict( + [ + ("Transfer-Encoding", "chunked"), + ] + ) + assert msg.raw_headers == ((b"transfer-encoding", b"chunked"),) + assert not msg.should_close + assert msg.compression is None + assert not msg.upgrade + assert msg.chunked + assert payload.is_eof() + assert b"second" == b"".join(d for d in payload._buffer) + + +@pytest.mark.parametrize("code", [204, 304, 101, 102]) +def test_parse_chunked_payload_empty_body_than_another_chunked( + response: Any, code: int +) -> None: + head = f"HTTP/1.1 {code} OK\r\n".encode() + text = head + b"transfer-encoding: chunked\r\n\r\n" + msg, payload = response.feed_data(text)[0][0] + assert msg.version == HttpVersion(major=1, minor=1) + assert msg.code == code + assert msg.reason == "OK" + assert msg.headers == CIMultiDict( + [ + ("Transfer-Encoding", "chunked"), + ] + ) + assert msg.raw_headers == ((b"transfer-encoding", b"chunked"),) + assert not msg.should_close + assert msg.compression is None + assert not msg.upgrade + assert msg.chunked + assert payload.is_eof() + + text = ( + b"HTTP/1.1 200 OK\r\n" + b"transfer-encoding: chunked\r\n\r\n" + b"6\r\nsecond\r\n0\r\n\r\n" + ) + msg, payload = response.feed_data(text)[0][0] + assert msg.version == HttpVersion(major=1, minor=1) + assert msg.code == 200 + assert msg.reason == "OK" + assert msg.headers == CIMultiDict( + [ + ("Transfer-Encoding", "chunked"), + ] + ) + assert msg.raw_headers == ((b"transfer-encoding", b"chunked"),) + assert not msg.should_close + assert msg.compression is None + assert not msg.upgrade + assert msg.chunked + assert payload.is_eof() + assert b"second" == b"".join(d for d in payload._buffer) + + def test_partial_url(parser: Any) -> None: messages, upgrade, tail = parser.feed_data(b"GET /te") assert len(messages) == 0 From 4e2fed0dcc5d0d91cc6d455006989bc8e28ce1f4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 27 Oct 2023 15:11:29 -0500 Subject: [PATCH 02/26] review --- aiohttp/http_parser.py | 5 +++-- tests/test_http_parser.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index ca2b21b7cc7..b33a9a4d331 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -349,8 +349,9 @@ def get_content_length() -> Optional[int]: code in (204, 304) or 100 <= code < 200 ) if ( - (length is not None and length > 0) - or (msg.chunked and not code_indicates_empty_body) + not code_indicates_empty_body + and (length is not None and length > 0) + or msg.chunked and not msg.upgrade ): payload = StreamReader( diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 880a10e0b59..b5915df463c 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -1062,7 +1062,7 @@ def test_parse_no_length_payload(parser: Any) -> None: def test_parse_content_length_payload_multiple(response: Any) -> None: - text = b"HTTP/1.1 200 OK\r\n" b"content-length: 5\r\n\r\n" b"first" + text = b"HTTP/1.1 200 OK\r\ncontent-length: 5\r\n\r\nfirst" msg, payload = response.feed_data(text)[0][0] assert msg.version == HttpVersion(major=1, minor=1) assert msg.code == 200 @@ -1080,7 +1080,7 @@ def test_parse_content_length_payload_multiple(response: Any) -> None: assert payload.is_eof() assert b"first" == b"".join(d for d in payload._buffer) - text = b"HTTP/1.1 200 OK\r\n" b"content-length: 6\r\n\r\n" b"second" + text = b"HTTP/1.1 200 OK\r\ncontent-length: 6\r\n\r\nsecond" msg, payload = response.feed_data(text)[0][0] assert msg.version == HttpVersion(major=1, minor=1) assert msg.code == 200 @@ -1100,7 +1100,7 @@ def test_parse_content_length_payload_multiple(response: Any) -> None: def test_parse_content_length_than_chunked_payload(response: Any) -> None: - text = b"HTTP/1.1 200 OK\r\n" b"content-length: 5\r\n\r\n" b"first" + text = b"HTTP/1.1 200 OK\r\ncontent-length: 5\r\n\r\nfirst" msg, payload = response.feed_data(text)[0][0] assert msg.version == HttpVersion(major=1, minor=1) assert msg.code == 200 From 6cae8460fe620ba1d02f519c267e30b8c708d7ce Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 27 Oct 2023 15:16:48 -0500 Subject: [PATCH 03/26] adjust --- aiohttp/http_parser.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index b33a9a4d331..64f0eefbbc5 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -350,10 +350,8 @@ def get_content_length() -> Optional[int]: ) if ( not code_indicates_empty_body - and (length is not None and length > 0) - or msg.chunked - and not msg.upgrade - ): + and ((length is not None and length > 0) or msg.chunked) + ) and not msg.upgrade: payload = StreamReader( self.protocol, timer=self.timer, From aa2c96399af36980aa2b8096eca425196e41cf49 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 27 Oct 2023 16:13:07 -0500 Subject: [PATCH 04/26] refactor --- aiohttp/http_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 64f0eefbbc5..75210f62859 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -351,7 +351,8 @@ def get_content_length() -> Optional[int]: if ( not code_indicates_empty_body and ((length is not None and length > 0) or msg.chunked) - ) and not msg.upgrade: + and not msg.upgrade + ): payload = StreamReader( self.protocol, timer=self.timer, From 57f786ff7186a45784ae0d861ee51ee9a5e6b50b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 27 Oct 2023 16:29:41 -0500 Subject: [PATCH 05/26] make msg.upgrade checked only with msg.chunked as before --- aiohttp/http_parser.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 75210f62859..d233b8917fd 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -348,10 +348,9 @@ def get_content_length() -> Optional[int]: code_indicates_empty_body = ( code in (204, 304) or 100 <= code < 200 ) - if ( - not code_indicates_empty_body - and ((length is not None and length > 0) or msg.chunked) - and not msg.upgrade + if not code_indicates_empty_body and ( + (length is not None and length > 0) + or (msg.chunked and not msg.upgrade) ): payload = StreamReader( self.protocol, From 434f39c5f5bba457b53ac87d86fb59688893d6f9 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 27 Oct 2023 16:33:34 -0500 Subject: [PATCH 06/26] keep it the same as much as possible --- aiohttp/http_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index d233b8917fd..61d0cb1984b 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -350,7 +350,8 @@ def get_content_length() -> Optional[int]: ) if not code_indicates_empty_body and ( (length is not None and length > 0) - or (msg.chunked and not msg.upgrade) + or msg.chunked + and not msg.upgrade ): payload = StreamReader( self.protocol, From 68c5bc2fbceb39a04c7b7abeaa4509bdac989eea Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 28 Oct 2023 12:59:27 -0500 Subject: [PATCH 07/26] must_be_empty_body --- aiohttp/http_parser.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 61d0cb1984b..369e3b43dd1 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -345,10 +345,8 @@ def get_content_length() -> Optional[int]: # calculate payload # 204, 304, 1xx should not have a body per # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - code_indicates_empty_body = ( - code in (204, 304) or 100 <= code < 200 - ) - if not code_indicates_empty_body and ( + must_be_empty_body = code in (204, 304) or 100 <= code < 200 + if not must_be_empty_body and ( (length is not None and length > 0) or msg.chunked and not msg.upgrade @@ -391,7 +389,7 @@ def get_content_length() -> Optional[int]: lax=self.lax, ) elif ( - not code_indicates_empty_body + not must_be_empty_body and length is None and self.read_until_eof ): From 52f75759f6a7bad041439de62b71bc9bc5726f22 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 28 Oct 2023 14:19:40 -0500 Subject: [PATCH 08/26] match must_be_empty_body --- aiohttp/http_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 369e3b43dd1..b2407914be9 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -345,7 +345,11 @@ def get_content_length() -> Optional[int]: # calculate payload # 204, 304, 1xx should not have a body per # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - must_be_empty_body = code in (204, 304) or 100 <= code < 200 + must_be_empty_body = ( + method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) + or code in (204, 304) + or 100 <= code < 200 + ) if not must_be_empty_body and ( (length is not None and length > 0) or msg.chunked From 27968853baa8e4dfc6cd3e6eb5907afe458cb9fd Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 28 Oct 2023 14:20:24 -0500 Subject: [PATCH 09/26] match must_be_empty_body --- aiohttp/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiohttp/client.py b/aiohttp/client.py index 1d9d9fe94d1..ce033af2f8c 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -526,7 +526,8 @@ async def _request( assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, - skip_payload=method.upper() == "HEAD", + skip_payload=method.upper() + in (hdrs.METH_CONNECT, hdrs.METH_HEAD), read_until_eof=read_until_eof, auto_decompress=auto_decompress, read_timeout=real_timeout.sock_read, From b5479e66556db9baa7934bf09e1fa81f480333e6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 28 Oct 2023 14:21:11 -0500 Subject: [PATCH 10/26] match must_be_empty_body --- aiohttp/client_proto.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aiohttp/client_proto.py b/aiohttp/client_proto.py index 3d42cf5058a..aa3c129ceae 100644 --- a/aiohttp/client_proto.py +++ b/aiohttp/client_proto.py @@ -248,7 +248,11 @@ def data_received(self, data: bytes) -> None: self._payload = payload - if self._skip_payload or message.code in (204, 304): + if ( + self._skip_payload + or message.code in (204, 304) + or 100 <= message.code < 200 + ): self.feed_data((message, EMPTY_PAYLOAD), 0) else: self.feed_data((message, payload), 0) From c001a19e2f4a2624a217bf973258efc17bdff5bc Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 28 Oct 2023 14:44:24 -0500 Subject: [PATCH 11/26] move to helpers --- aiohttp/client.py | 4 ++-- aiohttp/client_proto.py | 13 ++++++++----- aiohttp/helpers.py | 19 +++++++++++++++++++ aiohttp/http_parser.py | 18 ++++-------------- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/aiohttp/client.py b/aiohttp/client.py index ce033af2f8c..8e4168c27bf 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -81,6 +81,7 @@ TimeoutHandle, ceil_timeout, get_env_proxy_for_url, + method_must_be_empty_body, sentinel, strip_auth_from_url, ) @@ -526,8 +527,7 @@ async def _request( assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, - skip_payload=method.upper() - in (hdrs.METH_CONNECT, hdrs.METH_HEAD), + skip_payload=method_must_be_empty_body(method.upper()), read_until_eof=read_until_eof, auto_decompress=auto_decompress, read_timeout=real_timeout.sock_read, diff --git a/aiohttp/client_proto.py b/aiohttp/client_proto.py index aa3c129ceae..c42c9540d59 100644 --- a/aiohttp/client_proto.py +++ b/aiohttp/client_proto.py @@ -9,7 +9,12 @@ ServerDisconnectedError, ServerTimeoutError, ) -from .helpers import BaseTimerContext, set_exception, set_result +from .helpers import ( + BaseTimerContext, + set_exception, + set_result, + status_code_must_be_empty_body, +) from .http import HttpResponseParser, RawResponseMessage, WebSocketReader from .streams import EMPTY_PAYLOAD, DataQueue, StreamReader @@ -248,10 +253,8 @@ def data_received(self, data: bytes) -> None: self._payload = payload - if ( - self._skip_payload - or message.code in (204, 304) - or 100 <= message.code < 200 + if self._skip_payload or status_code_must_be_empty_body( + message.code ): self.feed_data((message, EMPTY_PAYLOAD), 0) else: diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index cbed287ce47..45a5e4b86eb 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1063,3 +1063,22 @@ def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: with suppress(ValueError): return datetime.datetime(*timetuple[:6], tzinfo=datetime.timezone.utc) return None + + +def must_be_empty_body(method: str, code: int) -> bool: + """Check if a request must return an empty body.""" + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + return status_code_must_be_empty_body(code) or method_must_be_empty_body(method) + + +def method_must_be_empty_body(method: str) -> bool: + """Check if a method must return an empty body.""" + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + return method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) + + +def status_code_must_be_empty_body(code: int) -> bool: + """Check if a status code must return an empty body.""" + # 204, 304, 1xx should not have a body per + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + return code in (204, 304) or 100 <= code < 200 diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index b2407914be9..76640b8b6e1 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -27,7 +27,7 @@ from . import hdrs from .base_protocol import BaseProtocol from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor -from .helpers import DEBUG, NO_EXTENSIONS, BaseTimerContext +from .helpers import DEBUG, NO_EXTENSIONS, BaseTimerContext, must_be_empty_body from .http_exceptions import ( BadHttpMessage, BadStatusLine, @@ -343,14 +343,8 @@ def get_content_length() -> Optional[int]: assert self.protocol is not None # calculate payload - # 204, 304, 1xx should not have a body per - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - must_be_empty_body = ( - method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) - or code in (204, 304) - or 100 <= code < 200 - ) - if not must_be_empty_body and ( + empty_body = must_be_empty_body(method, code) + if not empty_body and ( (length is not None and length > 0) or msg.chunked and not msg.upgrade @@ -392,11 +386,7 @@ def get_content_length() -> Optional[int]: auto_decompress=self._auto_decompress, lax=self.lax, ) - elif ( - not must_be_empty_body - and length is None - and self.read_until_eof - ): + elif not empty_body and length is None and self.read_until_eof: payload = StreamReader( self.protocol, timer=self.timer, From 9d2ebbdf51b6f9f59a101985c0c54faf707dcc78 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sun, 29 Oct 2023 13:03:20 +0000 Subject: [PATCH 12/26] Tweak comments --- aiohttp/helpers.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 45a5e4b86eb..8e71861170e 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1067,18 +1067,17 @@ def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: def must_be_empty_body(method: str, code: int) -> bool: """Check if a request must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 return status_code_must_be_empty_body(code) or method_must_be_empty_body(method) def method_must_be_empty_body(method: str) -> bool: """Check if a method must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.2 return method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) def status_code_must_be_empty_body(code: int) -> bool: """Check if a status code must return an empty body.""" - # 204, 304, 1xx should not have a body per - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - return code in (204, 304) or 100 <= code < 200 + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 + return code in {204, 304} or 100 <= code < 200 From d1fb2c103a9be5683a93e3106e06247bca0b3c5e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 29 Oct 2023 06:57:23 -0700 Subject: [PATCH 13/26] move upper --- aiohttp/client.py | 2 +- aiohttp/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/client.py b/aiohttp/client.py index 8e4168c27bf..4bde51e5b6b 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -527,7 +527,7 @@ async def _request( assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, - skip_payload=method_must_be_empty_body(method.upper()), + skip_payload=method_must_be_empty_body(method), read_until_eof=read_until_eof, auto_decompress=auto_decompress, read_timeout=real_timeout.sock_read, diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 8e71861170e..40c20a86776 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1074,7 +1074,7 @@ def method_must_be_empty_body(method: str) -> bool: """Check if a method must return an empty body.""" # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.2 - return method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) + return method.upper() in (hdrs.METH_CONNECT, hdrs.METH_HEAD) def status_code_must_be_empty_body(code: int) -> bool: From 998a875c3eb2e9b382c82a37aedef330f797d357 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 02:58:44 -0700 Subject: [PATCH 14/26] typing --- aiohttp/http_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 76640b8b6e1..e1403635ab5 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -337,9 +337,9 @@ def get_content_length() -> Optional[int]: self._upgraded = msg.upgrade - method = getattr(msg, "method", self.method) + method: str = getattr(msg, "method", self.method) # code is only present on responses - code = getattr(msg, "code", 0) + code: int = getattr(msg, "code", 0) assert self.protocol is not None # calculate payload From f459e8e6b678b76f7db170e3273af0a1973bd5a2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 02:59:24 -0700 Subject: [PATCH 15/26] sync helpers --- aiohttp/helpers.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 40c20a86776..45a5e4b86eb 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1067,17 +1067,18 @@ def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: def must_be_empty_body(method: str, code: int) -> bool: """Check if a request must return an empty body.""" + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 return status_code_must_be_empty_body(code) or method_must_be_empty_body(method) def method_must_be_empty_body(method: str) -> bool: """Check if a method must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.2 - return method.upper() in (hdrs.METH_CONNECT, hdrs.METH_HEAD) + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + return method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) def status_code_must_be_empty_body(code: int) -> bool: """Check if a status code must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 - return code in {204, 304} or 100 <= code < 200 + # 204, 304, 1xx should not have a body per + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 + return code in (204, 304) or 100 <= code < 200 From 9bde3df1451b3c8c860752f9e21d2cab09a0ed85 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:07:00 -0700 Subject: [PATCH 16/26] mypy --- aiohttp/http_parser.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index e1403635ab5..dd03ab2a99f 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -5,6 +5,7 @@ from contextlib import suppress from enum import IntEnum from typing import ( + TYPE_CHECKING, Any, ClassVar, Final, @@ -337,9 +338,13 @@ def get_content_length() -> Optional[int]: self._upgraded = msg.upgrade - method: str = getattr(msg, "method", self.method) + method = getattr(msg, "method", self.method) + if TYPE_CHECKING: + assert isinstance(method, str) # code is only present on responses - code: int = getattr(msg, "code", 0) + code = getattr(msg, "code", 0) + if TYPE_CHECKING: + assert isinstance(code, int) assert self.protocol is not None # calculate payload From 5ff928aba5ba510723728a8d45b40a833bbd8b8a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:16:02 -0700 Subject: [PATCH 17/26] remove if TYPE_CHECKING since it does not work here --- aiohttp/http_parser.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index dd03ab2a99f..d31bc2577a6 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -5,7 +5,6 @@ from contextlib import suppress from enum import IntEnum from typing import ( - TYPE_CHECKING, Any, ClassVar, Final, @@ -20,6 +19,7 @@ Type, TypeVar, Union, + cast, ) from multidict import CIMultiDict, CIMultiDictProxy, istr @@ -339,12 +339,10 @@ def get_content_length() -> Optional[int]: self._upgraded = msg.upgrade method = getattr(msg, "method", self.method) - if TYPE_CHECKING: - assert isinstance(method, str) + # assert method is not None + method = cast(str, method) # code is only present on responses code = getattr(msg, "code", 0) - if TYPE_CHECKING: - assert isinstance(code, int) assert self.protocol is not None # calculate payload From b113d0895725f2796dfd10305c651001edc37caf Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:32:40 -0700 Subject: [PATCH 18/26] Revert "sync helpers" This reverts commit f459e8e6b678b76f7db170e3273af0a1973bd5a2. --- aiohttp/helpers.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 45a5e4b86eb..40c20a86776 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1067,18 +1067,17 @@ def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: def must_be_empty_body(method: str, code: int) -> bool: """Check if a request must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 return status_code_must_be_empty_body(code) or method_must_be_empty_body(method) def method_must_be_empty_body(method: str) -> bool: """Check if a method must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - return method in (hdrs.METH_CONNECT, hdrs.METH_HEAD) + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.2 + return method.upper() in (hdrs.METH_CONNECT, hdrs.METH_HEAD) def status_code_must_be_empty_body(code: int) -> bool: """Check if a status code must return an empty body.""" - # 204, 304, 1xx should not have a body per - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - return code in (204, 304) or 100 <= code < 200 + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 + return code in {204, 304} or 100 <= code < 200 From a2ec23fd0adf4d6498f55426315b3c4caaad80b9 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:33:38 -0700 Subject: [PATCH 19/26] merge helper comments from other PR --- aiohttp/helpers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 40c20a86776..ec7ae175fea 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1067,6 +1067,7 @@ def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: def must_be_empty_body(method: str, code: int) -> bool: """Check if a request must return an empty body.""" + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 return status_code_must_be_empty_body(code) or method_must_be_empty_body(method) @@ -1079,5 +1080,6 @@ def method_must_be_empty_body(method: str) -> bool: def status_code_must_be_empty_body(code: int) -> bool: """Check if a status code must return an empty body.""" + # 204, 304, 1xx should not have a body per # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 return code in {204, 304} or 100 <= code < 200 From 046e29252f487c18482036ba63a0a8554b5458f0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:49:39 -0700 Subject: [PATCH 20/26] method not always there --- aiohttp/http_parser.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index d31bc2577a6..28d9172bfa4 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -19,7 +19,6 @@ Type, TypeVar, Union, - cast, ) from multidict import CIMultiDict, CIMultiDictProxy, istr @@ -28,7 +27,13 @@ from . import hdrs from .base_protocol import BaseProtocol from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor -from .helpers import DEBUG, NO_EXTENSIONS, BaseTimerContext, must_be_empty_body +from .helpers import ( + DEBUG, + NO_EXTENSIONS, + BaseTimerContext, + method_must_be_empty_body, + status_code_must_be_empty_body, +) from .http_exceptions import ( BadHttpMessage, BadStatusLine, @@ -339,14 +344,14 @@ def get_content_length() -> Optional[int]: self._upgraded = msg.upgrade method = getattr(msg, "method", self.method) - # assert method is not None - method = cast(str, method) # code is only present on responses code = getattr(msg, "code", 0) assert self.protocol is not None # calculate payload - empty_body = must_be_empty_body(method, code) + empty_body = status_code_must_be_empty_body(code) + if method and method_must_be_empty_body(method): + empty_body = True if not empty_body and ( (length is not None and length > 0) or msg.chunked From 35e7fd905632e14bebe27bce6fdc0537fc06fe0d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:51:34 -0700 Subject: [PATCH 21/26] method not always there --- aiohttp/http_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 28d9172bfa4..9fb43e04bda 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -350,7 +350,11 @@ def get_content_length() -> Optional[int]: assert self.protocol is not None # calculate payload empty_body = status_code_must_be_empty_body(code) - if method and method_must_be_empty_body(method): + if ( + not empty_body + and method + and method_must_be_empty_body(method) + ): empty_body = True if not empty_body and ( (length is not None and length > 0) From 6d44a3751d7f672ea21e9899c66d80ec47c1e8db Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 03:52:06 -0700 Subject: [PATCH 22/26] method not always there --- aiohttp/http_parser.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 9fb43e04bda..e862e39f7b0 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -349,13 +349,9 @@ def get_content_length() -> Optional[int]: assert self.protocol is not None # calculate payload - empty_body = status_code_must_be_empty_body(code) - if ( - not empty_body - and method - and method_must_be_empty_body(method) - ): - empty_body = True + empty_body = status_code_must_be_empty_body(code) or bool( + method and method_must_be_empty_body(method) + ) if not empty_body and ( (length is not None and length > 0) or msg.chunked From 369d5c6978f8a728a717e90b435e8295572b1327 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 04:11:16 -0700 Subject: [PATCH 23/26] remove now unused --- aiohttp/helpers.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index ec7ae175fea..688471beda2 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1065,12 +1065,6 @@ def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: return None -def must_be_empty_body(method: str, code: int) -> bool: - """Check if a request must return an empty body.""" - # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3 - return status_code_must_be_empty_body(code) or method_must_be_empty_body(method) - - def method_must_be_empty_body(method: str) -> bool: """Check if a method must return an empty body.""" # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 From 625dda51b88962db28e2e425b8b8163c52737df4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Oct 2023 09:11:56 -0700 Subject: [PATCH 24/26] timeline --- CHANGES/7755.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/7755.bugfix diff --git a/CHANGES/7755.bugfix b/CHANGES/7755.bugfix new file mode 100644 index 00000000000..40945aecf3f --- /dev/null +++ b/CHANGES/7755.bugfix @@ -0,0 +1 @@ +Fix py http parser not treating 204/304/1xx as an empty body From 935b6fba0f008ddea2af0d257589a1e3b42438c3 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Mon, 30 Oct 2023 16:57:04 +0000 Subject: [PATCH 25/26] Update test_http_parser.py --- tests/test_http_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index b5915df463c..17976cf6678 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -1141,7 +1141,7 @@ def test_parse_content_length_than_chunked_payload(response: Any) -> None: assert b"second" == b"".join(d for d in payload._buffer) -@pytest.mark.parametrize("code", [204, 304, 101, 102]) +@pytest.mark.parametrize("code", (204, 304, 101, 102)) def test_parse_chunked_payload_empty_body_than_another_chunked( response: Any, code: int ) -> None: From 8e75ea63ae3ace53611d1dbf86b25ce67d76ebf0 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Mon, 30 Oct 2023 16:57:31 +0000 Subject: [PATCH 26/26] Update helpers.py --- aiohttp/helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index f79bb839510..27bcc641734 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -1074,6 +1074,5 @@ def method_must_be_empty_body(method: str) -> bool: def status_code_must_be_empty_body(code: int) -> bool: """Check if a status code must return an empty body.""" - # 204, 304, 1xx should not have a body per # https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 return code in {204, 304} or 100 <= code < 200