diff --git a/CHANGES/2549.feature b/CHANGES/2549.feature new file mode 100644 index 00000000000..680d8a1e63b --- /dev/null +++ b/CHANGES/2549.feature @@ -0,0 +1,2 @@ +Make the `aiohttp.ClientResponse.get_encoding` method public with +the processing of invalid charset while detecting content encoding. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 6a50505ef96..51f79744bc4 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -122,6 +122,7 @@ Ludovic Gasc Lukasz Marcin Dobrzanski Makc Belousow Manuel Miranda +Marat Sharafutdinov Marco Paolini Mariano Anaya Martin Melka diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index 35695de98cc..d6429d6531e 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -1,4 +1,5 @@ import asyncio +import codecs import collections import io import json @@ -756,11 +757,16 @@ async def read(self): return self._content - def _get_encoding(self): + def get_encoding(self): ctype = self.headers.get(hdrs.CONTENT_TYPE, '').lower() mimetype = helpers.parse_mimetype(ctype) encoding = mimetype.parameters.get('charset') + if encoding: + try: + codecs.lookup(encoding) + except LookupError: + encoding = None if not encoding: if mimetype.type == 'application' and mimetype.subtype == 'json': # RFC 7159 states that the default encoding is UTF-8. @@ -778,7 +784,7 @@ async def text(self, encoding=None, errors='strict'): await self.read() if encoding is None: - encoding = self._get_encoding() + encoding = self.get_encoding() return self._content.decode(encoding, errors=errors) @@ -803,7 +809,7 @@ async def json(self, *, encoding=None, loads=json.loads, return None if encoding is None: - encoding = self._get_encoding() + encoding = self.get_encoding() return loads(stripped.decode(encoding)) diff --git a/docs/client_reference.rst b/docs/client_reference.rst index a91c9241424..572a3567229 100644 --- a/docs/client_reference.rst +++ b/docs/client_reference.rst @@ -1162,6 +1162,14 @@ Response object A namedtuple with request URL and headers from :class:`ClientRequest` object, :class:`aiohttp.RequestInfo` instance. + .. method:: get_encoding() + + Automatically detect content encoding using ``charset`` info in + ``Content-Type`` HTTP header. If this info is not exists or there + are no appropriate codecs for encoding then :term:`cchardet` / + :term:`chardet` is used. + + .. versionadded:: 3.0 ClientWebSocketResponse diff --git a/tests/test_client_response.py b/tests/test_client_response.py index 4765265e058..602691cc5ab 100644 --- a/tests/test_client_response.py +++ b/tests/test_client_response.py @@ -256,12 +256,12 @@ def side_effect(*args, **kwargs): 'Content-Type': 'application/json'} content = response.content = mock.Mock() content.read.side_effect = side_effect - response._get_encoding = mock.Mock() + response.get_encoding = mock.Mock() res = await response.text(encoding='cp1251') assert res == '{"тест": "пройден"}' assert response._connection is None - assert not response._get_encoding.called + assert not response.get_encoding.called async def test_text_detect_encoding(loop, session): @@ -283,6 +283,26 @@ def side_effect(*args, **kwargs): assert response._connection is None +async def test_text_detect_encoding_if_invalid_charset(loop, session): + response = ClientResponse('get', URL('http://def-cl-resp.org')) + response._post_init(loop, session) + + def side_effect(*args, **kwargs): + fut = loop.create_future() + fut.set_result('{"тест": "пройден"}'.encode('cp1251')) + return fut + + response.headers = {'Content-Type': 'text/plain;charset=invalid'} + content = response.content = mock.Mock() + content.read.side_effect = side_effect + + await response.read() + res = await response.text() + assert res == '{"тест": "пройден"}' + assert response._connection is None + assert response.get_encoding().lower() == 'windows-1251' + + async def test_text_after_read(loop, session): response = ClientResponse('get', URL('http://def-cl-resp.org')) response._post_init(loop, session) @@ -372,12 +392,12 @@ def side_effect(*args, **kwargs): 'Content-Type': 'application/json;charset=utf8'} content = response.content = mock.Mock() content.read.side_effect = side_effect - response._get_encoding = mock.Mock() + response.get_encoding = mock.Mock() res = await response.json(encoding='cp1251') assert res == {'тест': 'пройден'} assert response._connection is None - assert not response._get_encoding.called + assert not response.get_encoding.called @pytest.mark.xfail @@ -398,7 +418,7 @@ def test_get_encoding_unknown(loop, session): response.headers = {'Content-Type': 'application/json'} with mock.patch('aiohttp.client_reqrep.chardet') as m_chardet: m_chardet.detect.return_value = {'encoding': None} - assert response._get_encoding() == 'utf-8' + assert response.get_encoding() == 'utf-8' def test_raise_for_status_2xx():