From 80831fc587817603899fb585d654fd9dced27b40 Mon Sep 17 00:00:00 2001 From: Kimmo Parviainen-Jalanko Date: Thu, 27 Oct 2016 13:09:31 +0300 Subject: [PATCH] Fixed JSON response character set detection. RFC 7519 states that JSON *MUST* be encoded in UTF and that the default encoding, in absence of charset in CONTENT_TYPE header is UTF-8. This vastly improves performce when doing many small requests in environments where the server does not specify the character by omitting chardet. --- CONTRIBUTORS.txt | 1 + aiohttp/client_reqrep.py | 6 +++++- tests/test_client_response.py | 21 +-------------------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 14a4d929690..5c8c7750ff4 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -76,6 +76,7 @@ Julien Duponchelle Junjie Tao Justas Trimailovas Kay Zheng +Kimmo Parviainen-Jalanko Kirill Klenov Kirill Malovitsa Kyrylo Perevozchikov diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index 57cd2e8e1e5..2972050d02d 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -728,7 +728,11 @@ def _get_encoding(self): encoding = params.get('charset') if not encoding: - encoding = chardet.detect(self._content)['encoding'] + if mtype == 'application' and stype == 'json': + # RFC 7159 states that the default encoding is UTF-8. + encoding = 'utf-8' + else: + encoding = chardet.detect(self._content)['encoding'] if not encoding: encoding = 'utf-8' diff --git a/tests/test_client_response.py b/tests/test_client_response.py index 5ba9c3d4c28..646918067b3 100644 --- a/tests/test_client_response.py +++ b/tests/test_client_response.py @@ -171,7 +171,7 @@ def side_effect(*args, **kwargs): fut.set_result('{"тест": "пройден"}'.encode('cp1251')) return fut - response.headers = {'Content-Type': 'application/json'} + response.headers = {'Content-Type': 'text/plain'} content = response.content = mock.Mock() content.read.side_effect = side_effect @@ -275,25 +275,6 @@ def side_effect(*args, **kwargs): assert not response._get_encoding.called -@asyncio.coroutine -def test_json_detect_encoding(loop): - response = ClientResponse('get', URL('http://def-cl-resp.org')) - response._post_init(loop) - - def side_effect(*args, **kwargs): - fut = helpers.create_future(loop) - fut.set_result('{"тест": "пройден"}'.encode('cp1251')) - return fut - - response.headers = {'Content-Type': 'application/json'} - content = response.content = mock.Mock() - content.read.side_effect = side_effect - - res = yield from response.json() - assert res == {'тест': 'пройден'} - assert response._connection is None - - def test_override_flow_control(loop): class MyResponse(ClientResponse): flow_control_class = aiohttp.StreamReader