From a66b964ecc5f56a21caa91efe493102949b85eef Mon Sep 17 00:00:00 2001 From: Nikolay Kim Date: Mon, 7 Jul 2014 16:14:01 -0700 Subject: [PATCH] add chardet support to ClientResponse.text() and ClientResponse.json() #105 --- aiohttp/client.py | 29 ++++++++++++++++++++--------- setup.py | 2 +- tests/test_client.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/aiohttp/client.py b/aiohttp/client.py index 28d4503a03f..84e612c67d5 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -15,6 +15,10 @@ import urllib.parse import weakref import warnings +try: + import chardet +except ImportError: # pragma: no cover + chardet = None import aiohttp from . import helpers, streams @@ -701,17 +705,26 @@ def read_and_close(self, decode=False): ) return (yield from self.read(decode)) + def _get_encoding(self, encoding): + ctype = self.headers.get('CONTENT-TYPE', '').lower() + mtype, stype, _, params = helpers.parse_mimetype(ctype) + + if not encoding: + encoding = params.get('charset') + if not encoding and chardet: + encoding = chardet.detect(self._content)['encoding'] + if not encoding: # pragma: no cover + encoding = 'utf-8' + + return encoding + @asyncio.coroutine def text(self, encoding=None): """Read response payload and decode.""" if self._content is None: yield from self.read() - ctype = self.headers.get('CONTENT-TYPE', '').lower() - mtype, stype, _, params = helpers.parse_mimetype(ctype) - - encoding = encoding or params.get('charset', 'utf-8') - return self._content.decode(encoding) + return self._content.decode(self._get_encoding(encoding)) @asyncio.coroutine def json(self, *, encoding=None, loads=json.loads): @@ -720,16 +733,14 @@ def json(self, *, encoding=None, loads=json.loads): yield from self.read() ctype = self.headers.get('CONTENT-TYPE', '').lower() - mtype, stype, _, params = helpers.parse_mimetype(ctype) - if not (mtype == 'application' or stype == 'json'): + if 'json' not in ctype: client_log.warning( 'Attempt to decode JSON with unexpected mimetype: %s', ctype) if not self._content.strip(): return None - encoding = encoding or params.get('charset', 'utf-8') - return loads(self._content.decode(encoding)) + return loads(self._content.decode(self._get_encoding(encoding))) class HttpClient: diff --git a/setup.py b/setup.py index 1b4daf195fa..8b119637e42 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ else: install_requires = ['asyncio'] -tests_require = install_requires + ['nose', 'gunicorn'] +tests_require = install_requires + ['nose', 'gunicorn', 'chardet'] def read(f): diff --git a/tests/test_client.py b/tests/test_client.py index 00b41dc3004..7ffec8c3ce2 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -173,6 +173,23 @@ def second_call(*args, **kwargs): self.assertEqual(res, '{"тест": "пройден"}') self.assertTrue(self.response.close.called) + def test_text_detect_encoding(self): + def side_effect(*args, **kwargs): + def second_call(*args, **kwargs): + raise aiohttp.EofStream + fut = asyncio.Future(loop=self.loop) + fut.set_result('{"тест": "пройден"}'.encode('cp1251')) + content.read.side_effect = second_call + return fut + self.response.headers = {'CONTENT-TYPE': 'application/json'} + content = self.response.content = unittest.mock.Mock() + content.read.side_effect = side_effect + self.response.close = unittest.mock.Mock() + + res = self.loop.run_until_complete(self.response.text()) + self.assertEqual(res, '{"тест": "пройден"}') + self.assertTrue(self.response.close.called) + def test_json(self): def side_effect(*args, **kwargs): def second_call(*args, **kwargs): @@ -230,6 +247,23 @@ def second_call(*args, **kwargs): self.assertEqual(res, {'тест': 'пройден'}) self.assertTrue(self.response.close.called) + def test_json_detect_encoding(self): + def side_effect(*args, **kwargs): + def second_call(*args, **kwargs): + raise aiohttp.EofStream + fut = asyncio.Future(loop=self.loop) + fut.set_result('{"тест": "пройден"}'.encode('cp1251')) + content.read.side_effect = second_call + return fut + self.response.headers = {'CONTENT-TYPE': 'application/json'} + content = self.response.content = unittest.mock.Mock() + content.read.side_effect = side_effect + self.response.close = unittest.mock.Mock() + + res = self.loop.run_until_complete(self.response.json()) + self.assertEqual(res, {'тест': 'пройден'}) + self.assertTrue(self.response.close.called) + def test_override_flow_control(self): class MyResponse(ClientResponse): flow_control_class = aiohttp.FlowControlDataQueue