diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index bfa47d78d07..130a418f065 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -48,6 +48,7 @@ cdef class HttpParser: bint _started object _url + bytearray _buf str _path str _reason list _headers @@ -92,6 +93,7 @@ cdef class HttpParser: self._loop = loop self._timer = timer + self._buf = bytearray() self._payload = None self._payload_error = 0 self._payload_exception = payload_exception @@ -223,6 +225,9 @@ cdef class HttpParser: cdef _on_chunk_complete(self): self._payload.end_http_chunk_receiving() + cdef object _on_status_complete(self): + pass + ### Public API ### def http_version(self): @@ -311,6 +316,22 @@ cdef class HttpRequestParserC(HttpParser): max_line_size, max_headers, max_field_size, payload_exception, response_with_body) + cdef object _on_status_complete(self): + cdef Py_buffer py_buf + if not self._buf: + return + self._path = self._buf.decode('utf-8', 'surrogateescape') + if self._cparser.method == 5: # CONNECT + self._url = URL(self._path) + else: + PyObject_GetBuffer(self._buf, &py_buf, PyBUF_SIMPLE) + try: + self._url = _parse_url(py_buf.buf, + py_buf.len) + finally: + PyBuffer_Release(&py_buf) + self._buf.clear() + cdef class HttpResponseParserC(HttpParser): @@ -323,6 +344,11 @@ cdef class HttpResponseParserC(HttpParser): max_line_size, max_headers, max_field_size, payload_exception, response_with_body, auto_decompress) + cdef object _on_status_complete(self): + if self._buf: + self._reason = self._buf.decode('utf-8', 'surrogateescape') + self._buf.clear() + cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data @@ -330,25 +356,20 @@ cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: pyparser._started = True pyparser._headers = [] pyparser._raw_headers = [] + pyparser._buf.clear() + pyparser._path = None + pyparser._reason = None return 0 cdef int cb_on_url(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data - cdef str path = None try: if length > pyparser._max_line_size: raise LineTooLong( 'Status line is too long', pyparser._max_line_size) - - path = at[:length].decode('utf-8', 'surrogateescape') - pyparser._path = path - - if pyparser._cparser.method == 5: # CONNECT - pyparser._url = URL(path) - else: - pyparser._url = _parse_url(at[:length], length) + pyparser._buf.extend(at[:length]) except BaseException as ex: pyparser._last_error = ex return -1 @@ -359,11 +380,12 @@ cdef int cb_on_url(cparser.http_parser* parser, cdef int cb_on_status(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data + cdef str reason try: if length > pyparser._max_line_size: raise LineTooLong( 'Status line is too long', pyparser._max_line_size) - pyparser._reason = at[:length].decode('utf-8', 'surrogateescape') + pyparser._buf.extend(at[:length]) except BaseException as ex: pyparser._last_error = ex return -1 @@ -375,6 +397,7 @@ cdef int cb_on_header_field(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: + pyparser._on_status_complete() if length > pyparser._max_field_size: raise LineTooLong( 'Header name is too long', pyparser._max_field_size) @@ -410,6 +433,7 @@ cdef int cb_on_header_value(cparser.http_parser* parser, cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data try: + pyparser._on_status_complete() pyparser._on_headers_complete() except BaseException as exc: pyparser._last_error = exc diff --git a/changes/2311.bugfix b/changes/2311.bugfix new file mode 100644 index 00000000000..61f0580d065 --- /dev/null +++ b/changes/2311.bugfix @@ -0,0 +1,2 @@ +Fix C HTTP parser for cases when status line is split into different +TCP packets. diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index cd028f724de..2b945fa80ea 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -617,6 +617,20 @@ def test_parse_no_length_payload(parser): assert payload.is_eof() +def test_partial_url(parser): + messages, upgrade, tail = parser.feed_data(b'GET /te') + assert len(messages) == 0 + messages, upgrade, tail = parser.feed_data(b'st HTTP/1.1\r\n\r\n') + assert len(messages) == 1 + + msg, payload = messages[0] + + assert msg.method == 'GET' + assert msg.path == '/test' + assert msg.version == (1, 1) + assert payload.is_eof() + + class TestParsePayload(unittest.TestCase): def setUp(self):