From 92ee27eb8d611978b7f328fe79d078216c8e18d9 Mon Sep 17 00:00:00 2001
From: Andrew Svetlov <andrew.svetlov@gmail.com>
Date: Sat, 19 May 2018 00:49:28 +0300
Subject: [PATCH] Optimize HTTP parser (#3015)

* Don't use temporal list of pairs for HTTP headers creation

* Don't recode headers twice

* Make sure that HTTP headers returned by a peer are always immutable

* Add changelog
---
 CHANGES/3015.feature         |  1 +
 aiohttp/_http_parser.pyx     | 10 +++++-----
 aiohttp/client_reqrep.py     |  4 ++--
 aiohttp/http_parser.py       | 11 ++++++-----
 aiohttp/test_utils.py        |  6 +++---
 aiohttp/web_request.py       |  3 ++-
 tests/test_test_utils.py     |  4 ++--
 tests/test_web_functional.py | 16 +++++++++++++++-
 8 files changed, 36 insertions(+), 19 deletions(-)
 create mode 100644 CHANGES/3015.feature

diff --git a/CHANGES/3015.feature b/CHANGES/3015.feature
new file mode 100644
index 00000000000..2bae19dec05
--- /dev/null
+++ b/CHANGES/3015.feature
@@ -0,0 +1 @@
+Optimize HTTP parser
\ No newline at end of file
diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx
index 1c47350a660..f16e4220c47 100644
--- a/aiohttp/_http_parser.pyx
+++ b/aiohttp/_http_parser.pyx
@@ -7,7 +7,7 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \
                      Py_buffer, PyBytes_AsString
 
-from multidict import CIMultiDict
+from multidict import CIMultiDict, CIMultiDictProxy
 from yarl import URL
 
 from aiohttp import hdrs
@@ -53,7 +53,7 @@ cdef class HttpParser:
         bytearray   _buf
         str     _path
         str     _reason
-        list    _headers
+        object  _headers
         list    _raw_headers
         bint    _upgraded
         list    _messages
@@ -132,7 +132,7 @@ cdef class HttpParser:
             value = self._header_value
 
             self._header_name = self._header_value = None
-            self._headers.append((name, value))
+            self._headers.add(name, value)
 
             raw_name = self._raw_header_name
             raw_value = self._raw_header_value
@@ -174,7 +174,7 @@ cdef class HttpParser:
         chunked = bool(self._cparser.flags & cparser.F_CHUNKED)
 
         raw_headers = tuple(self._raw_headers)
-        headers = CIMultiDict(self._headers)
+        headers = CIMultiDictProxy(self._headers)
 
         if upgrade or self._cparser.method == 5: # cparser.CONNECT:
             self._upgraded = True
@@ -356,7 +356,7 @@ cdef int cb_on_message_begin(cparser.http_parser* parser) except -1:
     cdef HttpParser pyparser = <HttpParser>parser.data
 
     pyparser._started = True
-    pyparser._headers = []
+    pyparser._headers = CIMultiDict()
     pyparser._raw_headers = []
     pyparser._buf.clear()
     pyparser._path = None
diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py
index 1838231184d..6fe8f16ac63 100644
--- a/aiohttp/client_reqrep.py
+++ b/aiohttp/client_reqrep.py
@@ -769,8 +769,8 @@ async def start(self, connection):
         self.reason = message.reason
 
         # headers
-        self._headers = CIMultiDictProxy(message.headers)
-        self._raw_headers = tuple(message.raw_headers)
+        self._headers = message.headers  # type is CIMultiDictProxy
+        self._raw_headers = message.raw_headers  # type is Tuple[bytes, bytes]
 
         # payload
         self.content = payload
diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
index 89571960b3a..c35fcf53fa5 100644
--- a/aiohttp/http_parser.py
+++ b/aiohttp/http_parser.py
@@ -4,7 +4,7 @@
 import zlib
 from enum import IntEnum
 
-from multidict import CIMultiDict
+from multidict import CIMultiDict, CIMultiDictProxy
 from yarl import URL
 
 from . import hdrs
@@ -324,6 +324,7 @@ def parse_headers(self, lines):
         upgrade = False
         chunked = False
         raw_headers = tuple(raw_headers)
+        headers = CIMultiDictProxy(headers)
 
         # keep-alive
         conn = headers.get(hdrs.CONNECTION)
@@ -385,8 +386,8 @@ def parse_message(self, lines):
             raise BadStatusLine(version)
 
         # read headers
-        headers, raw_headers, \
-            close, compression, upgrade, chunked = self.parse_headers(lines)
+        (headers, raw_headers,
+         close, compression, upgrade, chunked) = self.parse_headers(lines)
 
         if close is None:  # then the headers weren't set in the request
             if version <= HttpVersion10:  # HTTP 1.0 must asks to not close
@@ -438,8 +439,8 @@ def parse_message(self, lines):
             raise BadStatusLine(line)
 
         # read headers
-        headers, raw_headers, \
-            close, compression, upgrade, chunked = self.parse_headers(lines)
+        (headers, raw_headers,
+         close, compression, upgrade, chunked) = self.parse_headers(lines)
 
         if close is None:
             close = version <= HttpVersion10
diff --git a/aiohttp/test_utils.py b/aiohttp/test_utils.py
index c6c1ada9ac3..510dcb2c428 100644
--- a/aiohttp/test_utils.py
+++ b/aiohttp/test_utils.py
@@ -10,7 +10,7 @@
 from abc import ABC, abstractmethod
 from unittest import mock
 
-from multidict import CIMultiDict
+from multidict import CIMultiDict, CIMultiDictProxy
 from yarl import URL
 
 import aiohttp
@@ -489,11 +489,11 @@ def make_mocked_request(method, path, headers=None, *,
         closing = True
 
     if headers:
-        headers = CIMultiDict(headers)
+        headers = CIMultiDictProxy(CIMultiDict(headers))
         raw_hdrs = tuple(
             (k.encode('utf-8'), v.encode('utf-8')) for k, v in headers.items())
     else:
-        headers = CIMultiDict()
+        headers = CIMultiDictProxy(CIMultiDict())
         raw_hdrs = ()
 
     chunked = 'chunked' in headers.get(hdrs.TRANSFER_ENCODING, '').lower()
diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py
index 17e9e43771d..951ab2a5dae 100644
--- a/aiohttp/web_request.py
+++ b/aiohttp/web_request.py
@@ -131,7 +131,8 @@ def clone(self, *, method=sentinel, rel_url=sentinel,
             dct['url'] = rel_url
             dct['path'] = str(rel_url)
         if headers is not sentinel:
-            dct['headers'] = CIMultiDict(headers)
+            # a copy semantic
+            dct['headers'] = CIMultiDictProxy(CIMultiDict(headers))
             dct['raw_headers'] = tuple((k.encode('utf-8'), v.encode('utf-8'))
                                        for k, v in headers.items())
 
diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py
index 0afa5f63bf6..a45e7cab753 100644
--- a/tests/test_test_utils.py
+++ b/tests/test_test_utils.py
@@ -2,7 +2,7 @@
 from unittest import mock
 
 import pytest
-from multidict import CIMultiDict
+from multidict import CIMultiDict, CIMultiDictProxy
 from yarl import URL
 
 import aiohttp
@@ -170,7 +170,7 @@ def test_make_mocked_request(headers):
     assert req.method == "GET"
     assert req.path == "/"
     assert isinstance(req, web.Request)
-    assert isinstance(req.headers, CIMultiDict)
+    assert isinstance(req.headers, CIMultiDictProxy)
 
 
 def test_make_mocked_request_sslcontext():
diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py
index 9354a06fb75..9496629829c 100644
--- a/tests/test_web_functional.py
+++ b/tests/test_web_functional.py
@@ -8,7 +8,7 @@
 
 import pytest
 from async_generator import async_generator, yield_
-from multidict import MultiDict
+from multidict import CIMultiDictProxy, MultiDict
 from yarl import URL
 
 import aiohttp
@@ -1822,3 +1822,17 @@ async def handler(request):
     client = await aiohttp_client(app)
     resp = await client.get('/get')
     assert resp.status == 200
+
+
+async def test_request_headers_type(aiohttp_client):
+
+    async def handler(request):
+        assert isinstance(request.headers, CIMultiDictProxy)
+        return web.Response()
+
+    app = web.Application()
+    app.add_routes([web.get('/get', handler)])
+
+    client = await aiohttp_client(app)
+    resp = await client.get('/get')
+    assert resp.status == 200